From c4b7b2e97b9941313eab4dfdf1668c112b8b1320 Mon Sep 17 00:00:00 2001 From: Lucas Date: Thu, 12 Sep 2024 14:54:27 -0400 Subject: [PATCH 01/11] add radix tree benchmark --- CMakeLists.txt | 2 ++ external/CMakeLists.txt | 23 +++++++++++++++++--- script/bench.py | 2 +- src/bliss/bench_rax.h | 47 +++++++++++++++++++++++++++++++++++++++++ src/bliss_bench.cpp | 7 +++++- 5 files changed, 76 insertions(+), 5 deletions(-) create mode 100644 src/bliss/bench_rax.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 1a93347..be7178b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,6 +44,7 @@ add_library(bliss OBJECT ${CMAKE_SOURCE_DIR}/src/bliss/bliss_index.h ${CMAKE_SOURCE_DIR}/src/bliss/bench_lipp.h ${CMAKE_SOURCE_DIR}/src/bliss/bench_alex.h + ${CMAKE_SOURCE_DIR}/src/bliss/bench_rax.h ${CMAKE_SOURCE_DIR}/src/bliss/bench_btree.h ) @@ -57,6 +58,7 @@ target_link_libraries(bliss PUBLIC alex lipp tlx + rax ) target_include_directories(bliss PUBLIC diff --git a/external/CMakeLists.txt b/external/CMakeLists.txt index 3531b2f..7cfe73d 100644 --- a/external/CMakeLists.txt +++ b/external/CMakeLists.txt @@ -31,6 +31,21 @@ FetchContent_MakeAvailable(cxxopts) +FetchContent_Declare( + rax + GIT_REPOSITORY https://github.com/antirez/rax + GIT_TAG master +) +FetchContent_GetProperties(rax) +if (NOT rax_POPULATED) + FetchContent_Populate(rax) +endif() + +add_library(rax INTERFACE) +target_include_directories(rax INTERFACE ${rax_SOURCE_DIR}) + + + FetchContent_Declare( alex GIT_REPOSITORY https://github.com/microsoft/ALEX.git @@ -59,10 +74,12 @@ endif() add_library(lipp INTERFACE) target_include_directories(lipp INTERFACE ${lipp_SOURCE_DIR}/src/core) + + FetchContent_Declare( - tlx - GIT_REPOSITORY https://github.com/tlx/tlx.git - GIT_TAG master + tlx + GIT_REPOSITORY https://github.com/tlx/tlx.git + GIT_TAG master ) FetchContent_GetProperties(tlx) if (NOT tlx_POPULATED) diff --git a/script/bench.py b/script/bench.py index efaac72..b380d78 100755 --- a/script/bench.py +++ b/script/bench.py @@ -7,7 +7,7 @@ from infra.pybliss import BlissArgs, PyBliss from infra.util import get_file_params -INDEXES = ["btree"] +INDEXES = ["btree", "radix_tree"] PRELOAD_FACTOR = 0.4 WRITE_FACTOR = 0.4 READ_FACTOR = 0.2 diff --git a/src/bliss/bench_rax.h b/src/bliss/bench_rax.h new file mode 100644 index 0000000..83416b0 --- /dev/null +++ b/src/bliss/bench_rax.h @@ -0,0 +1,47 @@ +#ifndef BLISS_BENCH_RAX +#define BLISS_BENCH_RAX + +#include + +#include "bliss/bliss_index.h" + +#include "rax.h" + + +namespace bliss { + +template +class BlissRaxIndex : public BlissIndex { + public: + rax* _index; + BlissRaxIndex() { + _index = raxNew(); + }; + + void bulkload( + std::vector> values) override { + // expects the pairs to be pre-sorted before performing bulk load + for (const auto& pair : values) { + put(pair.first, pair.second); + } + } + + bool get(KEY_TYPE key) override { + std::string keyStr = std::to_string(key); + void* result = raxFind(_index, (unsigned char*)keyStr.c_str(), keyStr.size()); + return (result == raxNotFound) ? false : true; + } + + void put(KEY_TYPE key, VALUE_TYPE value) override { + std::string keyStr = std::to_string(key); + raxInsert(_index, (unsigned char*)keyStr.c_str(), keyStr.size(), (void*)new VALUE_TYPE(value), NULL); + } + + void end_routine() override { + raxFree(_index); + } +}; + +} // namespace bliss + +#endif // !BLISS_BENCH_RAX diff --git a/src/bliss_bench.cpp b/src/bliss_bench.cpp index d00d8a0..d688712 100644 --- a/src/bliss_bench.cpp +++ b/src/bliss_bench.cpp @@ -1,5 +1,7 @@ #include #include + +#include "rax.h" #include #include @@ -9,6 +11,7 @@ #include "bliss/bench_alex.h" #include "bliss/bench_btree.h" #include "bliss/bench_lipp.h" +#include "bliss/bench_rax.h" #include "bliss/bliss_index.h" #include "bliss/util/reader.h" #include "bliss/util/timer.h" @@ -49,7 +52,7 @@ BlissConfig parse_args(int argc, char *argv[]) { cxxopts::value()->default_value("0"))( "v,verbosity", "Verbosity [0: Info| 1: Debug | 2: Trace]", cxxopts::value()->default_value("0")->implicit_value("1"))( - "i,index", "Index type [alex | lipp | btree | bepstree | lsm]", + "i,index", "Index type [alex | lipp | btree | bepstree | lsm | radix_tree]", cxxopts::value()->default_value("btree"))( "file_type", "Input file type [binary | txt]", cxxopts::value()->default_value("txt"))( @@ -264,6 +267,8 @@ int main(int argc, char *argv[]) { index.reset(new bliss::BlissLippIndex()); } else if (config.index == "btree") { index.reset(new bliss::BlissBTreeIndex()); + } else if (config.index == "radix_tree") { + index.reset(new bliss::BlissRaxIndex()); } else { spdlog::error("{} not implemented yet", config.index); } From 1eb0c979b263fab171532ade1b862dba182cd290 Mon Sep 17 00:00:00 2001 From: Lucas Date: Mon, 23 Sep 2024 01:17:49 -0400 Subject: [PATCH 02/11] adding ART --- .gitignore | 2 ++ CMakeLists.txt | 4 ++-- data/example.data | 2 +- external/CMakeLists.txt | 16 +++++++------- script/bench.py | 3 +-- script/infra/pybliss.py | 1 + src/bliss/bench_ART.h | 49 +++++++++++++++++++++++++++++++++++++++++ src/bliss/bench_rax.h | 47 --------------------------------------- src/bliss_bench.cpp | 14 +++++++----- 9 files changed, 72 insertions(+), 66 deletions(-) create mode 100644 src/bliss/bench_ART.h delete mode 100644 src/bliss/bench_rax.h diff --git a/.gitignore b/.gitignore index af9eeda..e054749 100644 --- a/.gitignore +++ b/.gitignore @@ -201,3 +201,5 @@ cmake-build-debug/ src/bliss/.idea/ db_working_home + +.DS_Store diff --git a/CMakeLists.txt b/CMakeLists.txt index be7178b..5baeb25 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,8 +44,8 @@ add_library(bliss OBJECT ${CMAKE_SOURCE_DIR}/src/bliss/bliss_index.h ${CMAKE_SOURCE_DIR}/src/bliss/bench_lipp.h ${CMAKE_SOURCE_DIR}/src/bliss/bench_alex.h - ${CMAKE_SOURCE_DIR}/src/bliss/bench_rax.h ${CMAKE_SOURCE_DIR}/src/bliss/bench_btree.h + ${CMAKE_SOURCE_DIR}/src/bliss/bench_ART.h ) target_compile_features(bliss PUBLIC @@ -58,7 +58,7 @@ target_link_libraries(bliss PUBLIC alex lipp tlx - rax + art ) target_include_directories(bliss PUBLIC diff --git a/data/example.data b/data/example.data index 90bce25..c7dab99 100644 --- a/data/example.data +++ b/data/example.data @@ -997,4 +997,4 @@ 2991 2994 2997 -3000 +3000 \ No newline at end of file diff --git a/external/CMakeLists.txt b/external/CMakeLists.txt index 7cfe73d..0e920c3 100644 --- a/external/CMakeLists.txt +++ b/external/CMakeLists.txt @@ -32,17 +32,17 @@ FetchContent_MakeAvailable(cxxopts) FetchContent_Declare( - rax - GIT_REPOSITORY https://github.com/antirez/rax - GIT_TAG master + art + GIT_REPOSITORY https://github.com/BU-DiSC/ART + GIT_TAG main ) -FetchContent_GetProperties(rax) -if (NOT rax_POPULATED) - FetchContent_Populate(rax) +FetchContent_GetProperties(art) +if (NOT art_POPULATED) + FetchContent_Populate(art) endif() -add_library(rax INTERFACE) -target_include_directories(rax INTERFACE ${rax_SOURCE_DIR}) +add_library(art INTERFACE) +target_include_directories(art INTERFACE ${art_SOURCE_DIR}) diff --git a/script/bench.py b/script/bench.py index b380d78..4df9942 100755 --- a/script/bench.py +++ b/script/bench.py @@ -7,7 +7,7 @@ from infra.pybliss import BlissArgs, PyBliss from infra.util import get_file_params -INDEXES = ["btree", "radix_tree"] +INDEXES = ["btree", "radix_tree", "ART"] PRELOAD_FACTOR = 0.4 WRITE_FACTOR = 0.4 READ_FACTOR = 0.2 @@ -77,7 +77,6 @@ def main(args): ) args = parser.parse_args() - log_level = logging.WARNING if args.verbose == 1: log_level = logging.INFO diff --git a/script/infra/pybliss.py b/script/infra/pybliss.py index 9cc4c53..f2c3802 100644 --- a/script/infra/pybliss.py +++ b/script/infra/pybliss.py @@ -68,6 +68,7 @@ def run_single_bliss_bench(self, args: BlissArgs) -> BlissStats: f"--file_type {'binary' if args.file_type else 'txt'}", "--use_preload" if args.use_preload else "", ] + print("this is cmd: " , " ".join(cmd)) process = subprocess.Popen( " ".join(cmd), stdout=subprocess.PIPE, diff --git a/src/bliss/bench_ART.h b/src/bliss/bench_ART.h new file mode 100644 index 0000000..293871d --- /dev/null +++ b/src/bliss/bench_ART.h @@ -0,0 +1,49 @@ +#ifndef BLISS_BENCH_ART +#define BLISS_BENCH_ART + +#include + +#include + +#include "bliss/bliss_index.h" +#include "spdlog/spdlog.h" + + +namespace bliss { + +template +class BlissARTIndex : public BlissIndex { + public: + ART::Node* _index; + BlissARTIndex() { + _index = nullptr; + }; + + void bulkload( + std::vector> values) override { + // expects the pairs to be pre-sorted before performing bulk load + for (const auto& pair : values) { + put(pair.first, pair.second); + } + } + + bool get(KEY_TYPE key) override { + uint8_t ARTkey[8]; + ART::loadKey(key, ARTkey); + + ART::Node* leaf = ART::lookup(_index, ARTkey, 8, 0, 8); + return ART::isLeaf(leaf) && ART::getLeafValue(leaf) == key; + } + + void put(KEY_TYPE key, VALUE_TYPE value) override { + uint8_t ARTkey[8]; + ART::loadKey(key, ARTkey); + ART::insert(_index, &_index, ARTkey, 0, key, 8); + } + + void end_routine() override {} +}; + +} // namespace bliss + +#endif // !BLISS_BENCH_ART diff --git a/src/bliss/bench_rax.h b/src/bliss/bench_rax.h deleted file mode 100644 index 83416b0..0000000 --- a/src/bliss/bench_rax.h +++ /dev/null @@ -1,47 +0,0 @@ -#ifndef BLISS_BENCH_RAX -#define BLISS_BENCH_RAX - -#include - -#include "bliss/bliss_index.h" - -#include "rax.h" - - -namespace bliss { - -template -class BlissRaxIndex : public BlissIndex { - public: - rax* _index; - BlissRaxIndex() { - _index = raxNew(); - }; - - void bulkload( - std::vector> values) override { - // expects the pairs to be pre-sorted before performing bulk load - for (const auto& pair : values) { - put(pair.first, pair.second); - } - } - - bool get(KEY_TYPE key) override { - std::string keyStr = std::to_string(key); - void* result = raxFind(_index, (unsigned char*)keyStr.c_str(), keyStr.size()); - return (result == raxNotFound) ? false : true; - } - - void put(KEY_TYPE key, VALUE_TYPE value) override { - std::string keyStr = std::to_string(key); - raxInsert(_index, (unsigned char*)keyStr.c_str(), keyStr.size(), (void*)new VALUE_TYPE(value), NULL); - } - - void end_routine() override { - raxFree(_index); - } -}; - -} // namespace bliss - -#endif // !BLISS_BENCH_RAX diff --git a/src/bliss_bench.cpp b/src/bliss_bench.cpp index d688712..87a74f0 100644 --- a/src/bliss_bench.cpp +++ b/src/bliss_bench.cpp @@ -1,7 +1,6 @@ #include #include - -#include "rax.h" +#include #include #include @@ -11,7 +10,8 @@ #include "bliss/bench_alex.h" #include "bliss/bench_btree.h" #include "bliss/bench_lipp.h" -#include "bliss/bench_rax.h" +#include "bliss/bench_ART.h" + #include "bliss/bliss_index.h" #include "bliss/util/reader.h" #include "bliss/util/timer.h" @@ -52,7 +52,7 @@ BlissConfig parse_args(int argc, char *argv[]) { cxxopts::value()->default_value("0"))( "v,verbosity", "Verbosity [0: Info| 1: Debug | 2: Trace]", cxxopts::value()->default_value("0")->implicit_value("1"))( - "i,index", "Index type [alex | lipp | btree | bepstree | lsm | radix_tree]", + "i,index", "Index type [alex | lipp | btree | bepstree | lsm | ART]", cxxopts::value()->default_value("btree"))( "file_type", "Input file type [binary | txt]", cxxopts::value()->default_value("txt"))( @@ -255,11 +255,13 @@ int main(int argc, char *argv[]) { } else { data = bliss::read_file(config.data_file.c_str()); } + spdlog::debug("data.at(0) = {}", data.at(0)); spdlog::debug("data.at({}) = {}", data.size() - 1, data.at(data.size() - 1)); std::unique_ptr> index; + // Call the respective function based on the index value if (config.index == "alex") { index.reset(new bliss::BlissAlexIndex()); @@ -267,8 +269,8 @@ int main(int argc, char *argv[]) { index.reset(new bliss::BlissLippIndex()); } else if (config.index == "btree") { index.reset(new bliss::BlissBTreeIndex()); - } else if (config.index == "radix_tree") { - index.reset(new bliss::BlissRaxIndex()); + } else if (config.index == "ART") { + index.reset(new bliss::BlissARTIndex()); } else { spdlog::error("{} not implemented yet", config.index); } From 97b6cd8d3b1eaf0c7813f2a26e32e43c8b4eb175 Mon Sep 17 00:00:00 2001 From: Lucas Date: Mon, 23 Sep 2024 22:46:08 -0400 Subject: [PATCH 03/11] reflecting comments --- CMakeLists.txt | 2 +- script/bench.py | 2 +- script/infra/pybliss.py | 1 - src/bliss/bench_ART.h | 14 +++++++++----- src/bliss_bench.cpp | 9 ++++----- 5 files changed, 15 insertions(+), 13 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5baeb25..8bf51b5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -45,7 +45,7 @@ add_library(bliss OBJECT ${CMAKE_SOURCE_DIR}/src/bliss/bench_lipp.h ${CMAKE_SOURCE_DIR}/src/bliss/bench_alex.h ${CMAKE_SOURCE_DIR}/src/bliss/bench_btree.h - ${CMAKE_SOURCE_DIR}/src/bliss/bench_ART.h + ${CMAKE_SOURCE_DIR}/src/bliss/bench_art.h ) target_compile_features(bliss PUBLIC diff --git a/script/bench.py b/script/bench.py index 4df9942..9e758ef 100755 --- a/script/bench.py +++ b/script/bench.py @@ -7,7 +7,7 @@ from infra.pybliss import BlissArgs, PyBliss from infra.util import get_file_params -INDEXES = ["btree", "radix_tree", "ART"] +INDEXES = ["btree", "art"] PRELOAD_FACTOR = 0.4 WRITE_FACTOR = 0.4 READ_FACTOR = 0.2 diff --git a/script/infra/pybliss.py b/script/infra/pybliss.py index f2c3802..9cc4c53 100644 --- a/script/infra/pybliss.py +++ b/script/infra/pybliss.py @@ -68,7 +68,6 @@ def run_single_bliss_bench(self, args: BlissArgs) -> BlissStats: f"--file_type {'binary' if args.file_type else 'txt'}", "--use_preload" if args.use_preload else "", ] - print("this is cmd: " , " ".join(cmd)) process = subprocess.Popen( " ".join(cmd), stdout=subprocess.PIPE, diff --git a/src/bliss/bench_ART.h b/src/bliss/bench_ART.h index 293871d..c44ed3a 100644 --- a/src/bliss/bench_ART.h +++ b/src/bliss/bench_ART.h @@ -1,7 +1,7 @@ #ifndef BLISS_BENCH_ART #define BLISS_BENCH_ART -#include +#include "ART.h" #include @@ -14,6 +14,7 @@ namespace bliss { template class BlissARTIndex : public BlissIndex { public: + static constexpr size_t KEY_SIZE = sizeof(KEY_TYPE); ART::Node* _index; BlissARTIndex() { _index = nullptr; @@ -28,17 +29,20 @@ class BlissARTIndex : public BlissIndex { } bool get(KEY_TYPE key) override { - uint8_t ARTkey[8]; + uint8_t ARTkey[KEY_SIZE]; ART::loadKey(key, ARTkey); - ART::Node* leaf = ART::lookup(_index, ARTkey, 8, 0, 8); + uint8_t depth = 0; + ART::Node* leaf = ART::lookup(_index, ARTkey, KEY_SIZE, depth, KEY_SIZE); return ART::isLeaf(leaf) && ART::getLeafValue(leaf) == key; } void put(KEY_TYPE key, VALUE_TYPE value) override { - uint8_t ARTkey[8]; + uint8_t ARTkey[KEY_SIZE]; ART::loadKey(key, ARTkey); - ART::insert(_index, &_index, ARTkey, 0, key, 8); + + uint8_t depth = 0; + ART::insert(_index, &_index, ARTkey, depth, key, KEY_SIZE); } void end_routine() override {} diff --git a/src/bliss_bench.cpp b/src/bliss_bench.cpp index 87a74f0..e755d3a 100644 --- a/src/bliss_bench.cpp +++ b/src/bliss_bench.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include "ART.h" #include #include @@ -10,8 +10,7 @@ #include "bliss/bench_alex.h" #include "bliss/bench_btree.h" #include "bliss/bench_lipp.h" -#include "bliss/bench_ART.h" - +#include "bliss/bench_art.h" #include "bliss/bliss_index.h" #include "bliss/util/reader.h" #include "bliss/util/timer.h" @@ -52,7 +51,7 @@ BlissConfig parse_args(int argc, char *argv[]) { cxxopts::value()->default_value("0"))( "v,verbosity", "Verbosity [0: Info| 1: Debug | 2: Trace]", cxxopts::value()->default_value("0")->implicit_value("1"))( - "i,index", "Index type [alex | lipp | btree | bepstree | lsm | ART]", + "i,index", "Index type [alex | lipp | btree | bepstree | lsm | art]", cxxopts::value()->default_value("btree"))( "file_type", "Input file type [binary | txt]", cxxopts::value()->default_value("txt"))( @@ -269,7 +268,7 @@ int main(int argc, char *argv[]) { index.reset(new bliss::BlissLippIndex()); } else if (config.index == "btree") { index.reset(new bliss::BlissBTreeIndex()); - } else if (config.index == "ART") { + } else if (config.index == "art") { index.reset(new bliss::BlissARTIndex()); } else { spdlog::error("{} not implemented yet", config.index); From 772bbb92267ef548b7f162bcbbceee152dfb08e5 Mon Sep 17 00:00:00 2001 From: Lucas Date: Wed, 25 Sep 2024 18:04:23 -0400 Subject: [PATCH 04/11] reflecting comments --- src/bliss/bench_ART.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/bliss/bench_ART.h b/src/bliss/bench_ART.h index c44ed3a..dbe5030 100644 --- a/src/bliss/bench_ART.h +++ b/src/bliss/bench_ART.h @@ -31,7 +31,6 @@ class BlissARTIndex : public BlissIndex { bool get(KEY_TYPE key) override { uint8_t ARTkey[KEY_SIZE]; ART::loadKey(key, ARTkey); - uint8_t depth = 0; ART::Node* leaf = ART::lookup(_index, ARTkey, KEY_SIZE, depth, KEY_SIZE); return ART::isLeaf(leaf) && ART::getLeafValue(leaf) == key; @@ -42,7 +41,7 @@ class BlissARTIndex : public BlissIndex { ART::loadKey(key, ARTkey); uint8_t depth = 0; - ART::insert(_index, &_index, ARTkey, depth, key, KEY_SIZE); + ART::insert(_index, &_index, ARTkey, depth, value, KEY_SIZE); } void end_routine() override {} From c23b9df368430672f3836c81cadc7b604f81eec4 Mon Sep 17 00:00:00 2001 From: Lucas Date: Thu, 26 Sep 2024 15:11:09 -0400 Subject: [PATCH 05/11] utilizing value size --- src/bliss/bench_ART.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/bliss/bench_ART.h b/src/bliss/bench_ART.h index dbe5030..92db384 100644 --- a/src/bliss/bench_ART.h +++ b/src/bliss/bench_ART.h @@ -15,6 +15,7 @@ template class BlissARTIndex : public BlissIndex { public: static constexpr size_t KEY_SIZE = sizeof(KEY_TYPE); + static constexpr size_t VALUE_SIZE = sizeof(KEY_TYPE); ART::Node* _index; BlissARTIndex() { _index = nullptr; @@ -41,7 +42,7 @@ class BlissARTIndex : public BlissIndex { ART::loadKey(key, ARTkey); uint8_t depth = 0; - ART::insert(_index, &_index, ARTkey, depth, value, KEY_SIZE); + ART::insert(_index, &_index, ARTkey, depth, value, VALUE_SIZE); } void end_routine() override {} From 6be89ef690cb4eabe50523d6795c4cdfd63a4389 Mon Sep 17 00:00:00 2001 From: Aneesh Raman Date: Thu, 26 Sep 2024 13:25:45 -0400 Subject: [PATCH 06/11] [Feature] Adding unit tests for indexes (#31) * [Refactor] moved util code to separate header files * [FEAT] added unit tests * Added detailed instructions on integrating new index to readme * minor refactor to define * minor refactor to header defs --- CMakeLists.txt | 6 ++ README.md | 48 ++++++++- src/bliss/util/args.h | 65 ++++++++++++ src/bliss/util/config.h | 38 +++++++ src/bliss/util/execute.h | 44 ++++++++ src/bliss_bench.cpp | 168 +++++++------------------------ tests/CMakeLists.txt | 19 ++++ tests/bliss_index_tests.h | 46 +++++++++ tests/test_alex/CMakeLists.txt | 10 ++ tests/test_alex/alex_tests.cpp | 30 ++++++ tests/test_btree/CMakeLists.txt | 9 ++ tests/test_btree/btree_tests.cpp | 31 ++++++ tests/test_lipp/CMakeLists.txt | 9 ++ tests/test_lipp/lipp_tests.cpp | 31 ++++++ 14 files changed, 421 insertions(+), 133 deletions(-) create mode 100644 src/bliss/util/args.h create mode 100644 src/bliss/util/config.h create mode 100644 src/bliss/util/execute.h create mode 100644 tests/CMakeLists.txt create mode 100644 tests/bliss_index_tests.h create mode 100644 tests/test_alex/CMakeLists.txt create mode 100644 tests/test_alex/alex_tests.cpp create mode 100644 tests/test_btree/CMakeLists.txt create mode 100644 tests/test_btree/btree_tests.cpp create mode 100644 tests/test_lipp/CMakeLists.txt create mode 100644 tests/test_lipp/lipp_tests.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 8bf51b5..e262c69 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,6 +10,8 @@ set(CMAKE_CXX_STANDARD 17) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH}) +enable_testing() + if(CMAKE_BUILD_TYPE STREQUAL Debug) ADD_DEFINITIONS(-DDEBUG) endif() @@ -34,6 +36,8 @@ endif() # ============================================================================= add_subdirectory(external) +add_subdirectory(tests) + # ============================================================================= # HEADER bliss # Bliss lib files @@ -41,6 +45,8 @@ add_subdirectory(external) add_library(bliss OBJECT ${CMAKE_SOURCE_DIR}/src/bliss/util/timer.h ${CMAKE_SOURCE_DIR}/src/bliss/util/reader.h + ${CMAKE_SOURCE_DIR}/src/bliss/util/args.h + ${CMAKE_SOURCE_DIR}/src/bliss/util/config.h ${CMAKE_SOURCE_DIR}/src/bliss/bliss_index.h ${CMAKE_SOURCE_DIR}/src/bliss/bench_lipp.h ${CMAKE_SOURCE_DIR}/src/bliss/bench_alex.h diff --git a/README.md b/README.md index 4e66773..16dce95 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# bliss_benchmark +# BLISS Benchmark The purpose of this program is to benchmark the sortedness performance on various indexes.\ This research project is part of the [Data-intensive Systems and Computing (DiSC) lab](https://disc.bu.edu/) at Boston University. @@ -36,3 +36,49 @@ The program currently accepts the following parameters: -v, --verbosity [=arg(=1)] Verbosity [0: Info| 1: Debug | 2: Trace] (default: 0) -i, --index arg Index type (alex|lipp) (default: btree) ``` + +## Contributing to this Project +If you are interested in contributing to this benchmarking effort, +please reach out to [Aneesh Raman](aneeshr@bu.edu) / [Andy Huynh](ndhuynh@bu.edu) / [Manos Athanassoulis](mathan@bu.edu). + +### Integrating a New Index +We primarily import indexes as CMake libraries, before integrating them into the benchmarking framework. + +#### Importing using CMake +Import the library in `external/CMakeLists.txt`. Then, link the library to the `bliss` executable in the `CMakeLists.txt` file in the root project directory. + +#### Building the Adapter +Every index in the framework uses an adapter to interact with the benchmark. These adapters are found under `src/bliss`. + +- The abstract class for the adapter is found at `src/bliss/bliss_index.h`. +- Add the adapter code for the new index `` in its own file called `bench_abc.h` under `src/bliss`. + +#### Adding to the Benchmark +The benchmark code is found at `bliss_bench.cpp`. To add the index to the benchmark: + +- Include the relevant header file, e.g., `#include bliss/bench_abc.h`. +- In the `main()` function, add the additional condition when checking `config.index` for parsing the new index. + +#### Adding Unit Tests +Currently, we support basic unit tests with `put()` and `get()` operations in the benchmark. + +For the newly integrated index (e.g., `abc`), add relevant unit tests under the `tests/` folder. +- Create a new directory under `tests/` for the index `abc` by prefixing the folder with `test_*` (i.e., `mkdir tests/test_abc`). +- Each index folder gets its own `CMakeLists.txt` file that will link with the outer `tests/CMakeLists.txt` file. +- Copy the `CMakeLists.txt` file from one of the existing indexes into `tests/abc` (i.e., `cp tests/test_btree/CMakeLists.txt tests/test_abc/`). +- Modify `tests/CMakeLists.txt` to include the new subdirectory (i.e., add a new line with `add subdirectory(test_abc)`). + +You can create one or multiple cpp files under `tests/test_abc/` for your unit tests. + +- Name every unit test file prefixed with the index name (e.g., `abc_tests.cpp`). +- Include the header file `bliss_index_tests.h` in your test file to import common util code. + +**You may refer to `tests/test_btree/btree_tests.cpp` for samples.** + +## Issues & Additional Information +You may report bugs/issues directly on Github [here](https://github.com/BU-DiSC/bliss_benchmark/issues). + +For additional information, contact: +- [Aneesh Raman](aneeshr@bu.edu) +- [Andy Huynh](ndhuynh@bu.edu) +- [Manos Athanassoulis](mathan@bu.edu) diff --git a/src/bliss/util/args.h b/src/bliss/util/args.h new file mode 100644 index 0000000..cbebc83 --- /dev/null +++ b/src/bliss/util/args.h @@ -0,0 +1,65 @@ +#ifndef BLISS_ARGS_H +#define BLISS_ARGS_H +#include +#include +#include + +#include "bliss/util/config.h" + +using namespace bliss::utils::config; + +namespace bliss { +namespace utils { +namespace args { +BlissConfig parse_args(int argc, char *argv[]) { + BlissConfig config; + cxxopts::Options options( + "bliss", "BLISS: Benchmarking Learned Index Structures for Sortedness"); + + try { + options.add_options()("d,data_file", "Path to the data file", + cxxopts::value())( + "p,preload_factor", "Preload factor", + cxxopts::value()->default_value("0.5"))( + "w,write_factor", "Write factor", + cxxopts::value()->default_value("0.25"))( + "r,read_factor", "Read factor", + cxxopts::value()->default_value("0.1"))( + "m,mixed_read_write_ratio", "Read write ratio", + cxxopts::value()->default_value("0.5"))( + "s,seed", "Random Seed value", + cxxopts::value()->default_value("0"))( + "v,verbosity", "Verbosity [0: Info| 1: Debug | 2: Trace]", + cxxopts::value()->default_value("0")->implicit_value("1"))( + "i,index", "Index type [alex | lipp | btree | bepstree | lsm]", + cxxopts::value()->default_value("btree"))( + "file_type", "Input file type [binary | txt]", + cxxopts::value()->default_value("txt"))( + "use_preload", "Use index defined preload", + cxxopts::value()->default_value("false")); + + auto result = options.parse(argc, argv); + config = { + .data_file = result["data_file"].as(), + .preload_factor = result["preload_factor"].as(), + .write_factor = result["write_factor"].as(), + .read_factor = result["read_factor"].as(), + .mixed_read_write_ratio = + result["mixed_read_write_ratio"].as(), + .seed = result["seed"].as(), + .verbosity = result["verbosity"].as(), + .index = result["index"].as(), + .file_type = result["file_type"].as(), + .use_preload = result["use_preload"].as(), + }; + } catch (const std::exception &e) { + std::cerr << "Error: " << e.what() << std::endl; + std::cerr << options.help() << std::endl; + exit(1); + } + return config; +} +} // namespace args +} // namespace utils +} // namespace bliss +#endif \ No newline at end of file diff --git a/src/bliss/util/config.h b/src/bliss/util/config.h new file mode 100644 index 0000000..d3d56c6 --- /dev/null +++ b/src/bliss/util/config.h @@ -0,0 +1,38 @@ +#ifndef BLISS_CONFIG_H +#define BLISS_CONFIG_H + +#include + +#include + +namespace bliss { +namespace utils { +namespace config { +struct BlissConfig { + std::string data_file; + double preload_factor; + double write_factor; + double read_factor; + double mixed_read_write_ratio; + int seed; + int verbosity; + std::string index; + std::string file_type; + bool use_preload; +}; + +void display_config(BlissConfig config) { + spdlog::trace("Data File: {}", config.data_file); + spdlog::trace("Preload Factor: {}", config.preload_factor); + spdlog::trace("Write Factor: {}", config.write_factor); + spdlog::trace("Read Factor: {}", config.read_factor); + spdlog::trace("Read Write Ratio: {}", config.mixed_read_write_ratio); + spdlog::trace("Verbosity {}", config.verbosity); + spdlog::trace("Index: {}", config.index); + spdlog::trace("File type: {}", config.file_type); +} +} // namespace config +} // namespace utils +} // namespace bliss + +#endif \ No newline at end of file diff --git a/src/bliss/util/execute.h b/src/bliss/util/execute.h new file mode 100644 index 0000000..96fe823 --- /dev/null +++ b/src/bliss/util/execute.h @@ -0,0 +1,44 @@ +#ifndef BLISS_EXECUTE_H +#define BLISS_EXECUTE_H +#include +#include +#include + +#include "bliss/bliss_index.h" + +typedef unsigned long key_type; +typedef unsigned long value_type; + +namespace bliss { +namespace utils { +namespace executor { +void execute_inserts(bliss::BlissIndex &tree, + std::vector::iterator &start, + std::vector::iterator &end, int seed = 0) { + spdlog::trace("Executing Inserts"); + std::mt19937 gen(seed); + std::uniform_int_distribution dist(0, 1); + + auto num_keys = end - start; + for (auto &curr = start; curr != end; ++curr) { + tree.put(*curr, std::round(dist(gen) * num_keys)); + } +} + +void execute_non_empty_reads(bliss::BlissIndex &tree, + std::vector &data, int num_reads, + int seed = 0) { + std::mt19937 gen(seed); + std::uniform_int_distribution dist(0, 1); + + size_t key_idx; + for (auto blank = 0; blank < num_reads; blank++) { + key_idx = std::round(dist(gen) * (data.size() - 1)); + tree.get(data.at(key_idx)); + } +} + +} // namespace executor +} // namespace utils +} // namespace bliss +#endif \ No newline at end of file diff --git a/src/bliss_bench.cpp b/src/bliss_bench.cpp index e755d3a..14ed30c 100644 --- a/src/bliss_bench.cpp +++ b/src/bliss_bench.cpp @@ -12,118 +12,13 @@ #include "bliss/bench_lipp.h" #include "bliss/bench_art.h" #include "bliss/bliss_index.h" +#include "bliss/util/args.h" +#include "bliss/util/config.h" +#include "bliss/util/execute.h" #include "bliss/util/reader.h" #include "bliss/util/timer.h" -typedef unsigned long key_type; -typedef unsigned long value_type; - -struct BlissConfig { - std::string data_file; - double preload_factor; - double write_factor; - double read_factor; - double mixed_read_write_ratio; - int seed; - int verbosity; - std::string index; - std::string file_type; - bool use_preload; -}; - -BlissConfig parse_args(int argc, char *argv[]) { - BlissConfig config; - cxxopts::Options options( - "bliss", "BLISS: Benchmarking Learned Index Structures for Sortedness"); - - try { - options.add_options()("d,data_file", "Path to the data file", - cxxopts::value())( - "p,preload_factor", "Preload factor", - cxxopts::value()->default_value("0.5"))( - "w,write_factor", "Write factor", - cxxopts::value()->default_value("0.25"))( - "r,read_factor", "Read factor", - cxxopts::value()->default_value("0.1"))( - "m,mixed_read_write_ratio", "Read write ratio", - cxxopts::value()->default_value("0.5"))( - "s,seed", "Random Seed value", - cxxopts::value()->default_value("0"))( - "v,verbosity", "Verbosity [0: Info| 1: Debug | 2: Trace]", - cxxopts::value()->default_value("0")->implicit_value("1"))( - "i,index", "Index type [alex | lipp | btree | bepstree | lsm | art]", - cxxopts::value()->default_value("btree"))( - "file_type", "Input file type [binary | txt]", - cxxopts::value()->default_value("txt"))( - "use_preload", "Use index defined preload", - cxxopts::value()->default_value("false")); - - auto result = options.parse(argc, argv); - config = { - .data_file = result["data_file"].as(), - .preload_factor = result["preload_factor"].as(), - .write_factor = result["write_factor"].as(), - .read_factor = result["read_factor"].as(), - .mixed_read_write_ratio = - result["mixed_read_write_ratio"].as(), - .seed = result["seed"].as(), - .verbosity = result["verbosity"].as(), - .index = result["index"].as(), - .file_type = result["file_type"].as(), - .use_preload = result["use_preload"].as(), - }; - } catch (const std::exception &e) { - std::cerr << "Error: " << e.what() << std::endl; - std::cerr << options.help() << std::endl; - exit(1); - } - return config; -} - -void display_config(BlissConfig config) { - spdlog::trace("Data File: {}", config.data_file); - spdlog::trace("Preload Factor: {}", config.preload_factor); - spdlog::trace("Write Factor: {}", config.write_factor); - spdlog::trace("Read Factor: {}", config.read_factor); - spdlog::trace("Read Write Ratio: {}", config.mixed_read_write_ratio); - spdlog::trace("Verbosity {}", config.verbosity); - spdlog::trace("Index: {}", config.index); - spdlog::trace("File type: {}", config.file_type); -} - -void execute_non_empty_reads(bliss::BlissIndex &tree, - std::vector &data, int num_reads, - int seed = 0) { - std::mt19937 gen(seed); - std::uniform_int_distribution dist(0, 1); - - size_t key_idx; - for (auto blank = 0; blank < num_reads; blank++) { - key_idx = std::round(dist(gen) * (data.size() - 1)); - tree.get(data.at(key_idx)); - } -} - -std::vector> create_preload_vec( - std::vector::iterator &start, - std::vector::iterator &end, bool sort_values = true, - int value_generator_seed = 0) { - std::mt19937 gen(value_generator_seed); - std::uniform_int_distribution dist(0, 2 << 16); - std::vector> vec; - - if (sort_values) { - spdlog::trace("Sorting values"); - std::stable_sort(start, end); - } - - spdlog::trace("Creating key-value pairs"); - for (auto curr = start; curr != end; ++curr) { - vec.push_back(std::make_pair(*curr, dist(gen))); - } - - return vec; -} +using namespace bliss::utils; void execute_bulkload(bliss::BlissIndex &tree, std::vector> &values) { @@ -131,19 +26,6 @@ void execute_bulkload(bliss::BlissIndex &tree, tree.bulkload(values); } -void execute_inserts(bliss::BlissIndex &tree, - std::vector::iterator &start, - std::vector::iterator &end, int seed = 0) { - spdlog::trace("Executing Inserts"); - std::mt19937 gen(seed); - std::uniform_int_distribution dist(0, 1); - - auto num_keys = end - start; - for (auto &curr = start; curr != end; ++curr) { - tree.put(*curr, std::round(dist(gen) * num_keys)); - } -} - void execute_mixed_workload(bliss::BlissIndex &tree, std::vector::iterator &start, std::vector::iterator &end, @@ -172,10 +54,30 @@ void execute_mixed_workload(bliss::BlissIndex &tree, } } } +std::vector> create_preload_vec( + std::vector::iterator &start, + std::vector::iterator &end, bool sort_values = true, + int value_generator_seed = 0) { + std::mt19937 gen(value_generator_seed); + std::uniform_int_distribution dist(0, 2 << 16); + std::vector> vec; + + if (sort_values) { + spdlog::trace("Sorting values"); + std::stable_sort(start, end); + } + + spdlog::trace("Creating key-value pairs"); + for (auto curr = start; curr != end; ++curr) { + vec.push_back(std::make_pair(*curr, dist(gen))); + } + + return vec; +} void workload_executor(bliss::BlissIndex &tree, - std::vector &data, const BlissConfig &config, - const int seed) { + std::vector &data, + const config::BlissConfig &config, const int seed) { size_t num_inserts = data.size(); size_t num_preload = std::round(config.preload_factor * num_inserts); size_t num_writes = std::round(config.write_factor * data.size()); @@ -203,8 +105,9 @@ void workload_executor(bliss::BlissIndex &tree, std::chrono::duration_cast( std::chrono::high_resolution_clock::now() - start) .count(); - preload_time = time_function( - [&]() { execute_inserts(tree, preload_start, preload_end); }); + preload_time = time_function([&]() { + executor::execute_inserts(tree, preload_start, preload_end); + }); } spdlog::info("Preload Creation Time (ns): {}", preload_creation_time); spdlog::info("Preload Time (ns): {}", preload_time); @@ -213,8 +116,8 @@ void workload_executor(bliss::BlissIndex &tree, spdlog::debug("Writing {} items", num_writes); auto write_start = preload_end; auto write_end = write_start + num_writes; - auto write_time = - time_function([&]() { execute_inserts(tree, write_start, write_end); }); + auto write_time = time_function( + [&]() { executor::execute_inserts(tree, write_start, write_end); }); spdlog::info("Write Time (ns): {}", write_time); // Timing for mixed workloads running @@ -229,13 +132,14 @@ void workload_executor(bliss::BlissIndex &tree, // Timing for reads on index spdlog::debug("Reading {} items", num_reads); - auto read_time = time_function( - [&]() { execute_non_empty_reads(tree, data, num_reads, seed); }); + auto read_time = time_function([&]() { + executor::execute_non_empty_reads(tree, data, num_reads, seed); + }); spdlog::info("Read Time (ns): {}", read_time); } int main(int argc, char *argv[]) { - auto config = parse_args(argc, argv); + auto config = args::parse_args(argc, argv); switch (config.verbosity) { case 1: spdlog::set_level(spdlog::level::debug); @@ -271,7 +175,7 @@ int main(int argc, char *argv[]) { } else if (config.index == "art") { index.reset(new bliss::BlissARTIndex()); } else { - spdlog::error("{} not implemented yet", config.index); + spdlog::error(config.index + " not implemented yet", 1); } workload_executor(*index, data, config, 0); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 0000000..e916e4f --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,19 @@ +include(FetchContent) + +FetchContent_Declare( + googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG release-1.12.1 +) +FetchContent_MakeAvailable(googletest) + +add_library(bliss_test_infra OBJECT +${CMAKE_CURRENT_SOURCE_DIR}/bliss_index_tests.h) + +target_include_directories(bliss_test_infra PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR} +) + +add_subdirectory(test_alex) +add_subdirectory(test_lipp) +add_subdirectory(test_btree) \ No newline at end of file diff --git a/tests/bliss_index_tests.h b/tests/bliss_index_tests.h new file mode 100644 index 0000000..edb14fa --- /dev/null +++ b/tests/bliss_index_tests.h @@ -0,0 +1,46 @@ +#ifndef BLISS_INDEX_TESTS_H +#define BLISS_INDEX_TESTS_H +#include +#include +#include +#include + +#include +#include +#include + +#include "bliss/bench_alex.h" +#include "bliss/bench_btree.h" +#include "bliss/bench_lipp.h" +#include "bliss/bliss_index.h" +#include "bliss/util/args.h" +#include "bliss/util/config.h" +#include "bliss/util/execute.h" +#include "bliss/util/reader.h" +#include "bliss/util/timer.h" + +using namespace bliss::utils; + +using key_type = unsigned long; +using value_type = unsigned long; + +class BlissIndexTest : public testing::Test { + protected: + std::unique_ptr> index; + std::string indexes[3] = {"alex", "lipp", "btree"}; + int num_keys = 100000; + + void SetUp() {} + + void GenerateData(std::vector &data, int num_keys, + bool sorted = true) { + for (int i = 0; i < num_keys; i++) { + data.push_back(i); + } + if (!sorted) { + std::random_shuffle(data.begin(), data.end()); + } + } +}; + +#endif \ No newline at end of file diff --git a/tests/test_alex/CMakeLists.txt b/tests/test_alex/CMakeLists.txt new file mode 100644 index 0000000..023bcd0 --- /dev/null +++ b/tests/test_alex/CMakeLists.txt @@ -0,0 +1,10 @@ +get_filename_component(EXEC ${CMAKE_CURRENT_SOURCE_DIR} NAME) +file(GLOB_RECURSE CPP_TESTS "*_tests.cpp") +add_executable(${EXEC} ${CPP_TESTS}) +target_link_libraries(${EXEC} PRIVATE +bliss +bliss_test_infra +GTest::gtest_main) + +include(GoogleTest) +gtest_discover_tests(${EXEC}) \ No newline at end of file diff --git a/tests/test_alex/alex_tests.cpp b/tests/test_alex/alex_tests.cpp new file mode 100644 index 0000000..c7e3fc6 --- /dev/null +++ b/tests/test_alex/alex_tests.cpp @@ -0,0 +1,30 @@ +#include "bliss_index_tests.h" + +class AlexTest : public BlissIndexTest {}; +TEST_F(AlexTest, TestAlex_Sorted) { + index.reset(new bliss::BlissAlexIndex()); + std::vector data; + GenerateData(data, num_keys); + + auto insert_start = data.begin(); + auto insert_end = data.end(); + executor::execute_inserts(*index, insert_start, insert_end); + + for (auto key : data) { + EXPECT_TRUE(index->get(key)); + } +} + +TEST_F(AlexTest, TestAlex_Random) { + index.reset(new bliss::BlissAlexIndex()); + std::vector data; + GenerateData(data, num_keys, false); + + auto insert_start = data.begin(); + auto insert_end = data.end(); + executor::execute_inserts(*index, insert_start, insert_end); + + for (auto key : data) { + EXPECT_TRUE(index->get(key)); + } +} \ No newline at end of file diff --git a/tests/test_btree/CMakeLists.txt b/tests/test_btree/CMakeLists.txt new file mode 100644 index 0000000..b44a9ac --- /dev/null +++ b/tests/test_btree/CMakeLists.txt @@ -0,0 +1,9 @@ +get_filename_component(EXEC ${CMAKE_CURRENT_SOURCE_DIR} NAME) +file(GLOB_RECURSE CPP_TESTS "*_tests.cpp") +add_executable(${EXEC} ${CPP_TESTS}) +target_link_libraries(${EXEC} PRIVATE +bliss +bliss_test_infra +GTest::gtest_main) +include(GoogleTest) +gtest_discover_tests(${EXEC}) \ No newline at end of file diff --git a/tests/test_btree/btree_tests.cpp b/tests/test_btree/btree_tests.cpp new file mode 100644 index 0000000..88787c9 --- /dev/null +++ b/tests/test_btree/btree_tests.cpp @@ -0,0 +1,31 @@ +#include "bliss_index_tests.h" + +class BTreeTest : public BlissIndexTest {}; + +TEST_F(BTreeTest, TestBTree_Sorted) { + index.reset(new bliss::BlissBTreeIndex()); + std::vector data; + GenerateData(data, num_keys); + + auto insert_start = data.begin(); + auto insert_end = data.end(); + executor::execute_inserts(*index, insert_start, insert_end); + + for (auto key : data) { + EXPECT_TRUE(index->get(key)); + } +} + +TEST_F(BTreeTest, TestBTree_Random) { + index.reset(new bliss::BlissBTreeIndex()); + std::vector data; + GenerateData(data, num_keys, false); + + auto insert_start = data.begin(); + auto insert_end = data.end(); + executor::execute_inserts(*index, insert_start, insert_end); + + for (auto key : data) { + EXPECT_TRUE(index->get(key)); + } +} \ No newline at end of file diff --git a/tests/test_lipp/CMakeLists.txt b/tests/test_lipp/CMakeLists.txt new file mode 100644 index 0000000..b44a9ac --- /dev/null +++ b/tests/test_lipp/CMakeLists.txt @@ -0,0 +1,9 @@ +get_filename_component(EXEC ${CMAKE_CURRENT_SOURCE_DIR} NAME) +file(GLOB_RECURSE CPP_TESTS "*_tests.cpp") +add_executable(${EXEC} ${CPP_TESTS}) +target_link_libraries(${EXEC} PRIVATE +bliss +bliss_test_infra +GTest::gtest_main) +include(GoogleTest) +gtest_discover_tests(${EXEC}) \ No newline at end of file diff --git a/tests/test_lipp/lipp_tests.cpp b/tests/test_lipp/lipp_tests.cpp new file mode 100644 index 0000000..78750a8 --- /dev/null +++ b/tests/test_lipp/lipp_tests.cpp @@ -0,0 +1,31 @@ +#include "bliss_index_tests.h" + +class LippTest : public BlissIndexTest {}; + +TEST_F(LippTest, TestLipp_Sorted) { + index.reset(new bliss::BlissLippIndex()); + std::vector data; + GenerateData(data, num_keys); + + auto insert_start = data.begin(); + auto insert_end = data.end(); + executor::execute_inserts(*index, insert_start, insert_end); + + for (auto key : data) { + EXPECT_TRUE(index->get(key)); + } +} + +TEST_F(LippTest, TestLipp_Random) { + index.reset(new bliss::BlissAlexIndex()); + std::vector data; + GenerateData(data, num_keys, false); + + auto insert_start = data.begin(); + auto insert_end = data.end(); + executor::execute_inserts(*index, insert_start, insert_end); + + for (auto key : data) { + EXPECT_TRUE(index->get(key)); + } +} \ No newline at end of file From 45a59adb41a070a95067bd7c1c3bd21136e509f5 Mon Sep 17 00:00:00 2001 From: Aneesh Raman Date: Thu, 26 Sep 2024 15:15:11 -0400 Subject: [PATCH 07/11] [Feature] Add Github actions workflow (#35) * Create cmake-single-platform.yml for Github actions workflow * Update cmake-single-platform.yml * added a verifier to check if unit tests exist * minor edit * minor edit --- .github/workflows/cmake-single-platform.yml | 48 +++++++++++++++++++++ tests/unit_test_exists.sh | 31 +++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 .github/workflows/cmake-single-platform.yml create mode 100755 tests/unit_test_exists.sh diff --git a/.github/workflows/cmake-single-platform.yml b/.github/workflows/cmake-single-platform.yml new file mode 100644 index 0000000..017f85c --- /dev/null +++ b/.github/workflows/cmake-single-platform.yml @@ -0,0 +1,48 @@ +# This starter workflow is for a CMake project running on a single platform. There is a different starter workflow if you need cross-platform coverage. +# See: https://github.com/actions/starter-workflows/blob/main/ci/cmake-multi-platform.yml +name: Basic Tests for BLISS Benchmark + +on: + push: + branches: + - "main" + pull_request: + branches: [ "main" ] + +env: + # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.) + BUILD_TYPE: Release + +jobs: + build: + # The CMake configure and build commands are platform agnostic and should work equally well on Windows or Mac. + # You can convert this to a matrix build if you need cross-platform coverage. + # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + submodules: 'true' + token: ${{ secrets.ACTIONS_ACCESS_TOKEN }} + + - name: Configure CMake + # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. + # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} + + - name: Build + # Build your program with the given configuration + run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} + + - name: Verify Tests Exist + + working-directory: ${{github.workspace}}/tests + run: sh unit_test_exists.sh + + - name: Test + working-directory: ${{github.workspace}}/build + # Execute tests defined by the CMake configuration. + # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail + run: ctest -C ${{env.BUILD_TYPE}} + diff --git a/tests/unit_test_exists.sh b/tests/unit_test_exists.sh new file mode 100755 index 0000000..67125ac --- /dev/null +++ b/tests/unit_test_exists.sh @@ -0,0 +1,31 @@ + +# boolean flag to check if test passed +test_passed=true + +# loop through all files in src/bliss directory excluding subdirectories +for file in $(find ../src/bliss -type f -name "bench_*.h"); do + # get the filename without the path + filename=$(basename $file) + # get the filename without the extension + filename_no_ext="${filename%.*}" + # get index name from filename which is of format bench_.h + index=$(echo $filename_no_ext | cut -d'_' -f2) + + # check if the test folder exists + if [ ! -d "test_$index" ]; then + echo "tests/test_$index/ directory does not exist" + test_passed=false + break + else + # check if test folder contains a CMakeLists.txt or _tests.cpp file + if [ ! -f "test_$index/CMakeLists.txt" ] && [ ! -f "test_$index/${index}_tests.cpp" ]; then + echo "tests/test_$index/ directory does not contain CMakeLists.txt or ${index}_tests.cpp file" + test_passed=false + break + fi + fi +done + +if [ "$test_passed" = true ]; then + echo "unit tests exist for all indexes" +fi \ No newline at end of file From aa3780eb4b8318f319395e5ce5faf987b2247495 Mon Sep 17 00:00:00 2001 From: Lucas Date: Thu, 12 Sep 2024 14:54:27 -0400 Subject: [PATCH 08/11] add radix tree benchmark --- CMakeLists.txt | 1 + src/bliss/bench_rax.h | 47 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 src/bliss/bench_rax.h diff --git a/CMakeLists.txt b/CMakeLists.txt index e262c69..a9a5e17 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,6 +50,7 @@ add_library(bliss OBJECT ${CMAKE_SOURCE_DIR}/src/bliss/bliss_index.h ${CMAKE_SOURCE_DIR}/src/bliss/bench_lipp.h ${CMAKE_SOURCE_DIR}/src/bliss/bench_alex.h + ${CMAKE_SOURCE_DIR}/src/bliss/bench_rax.h ${CMAKE_SOURCE_DIR}/src/bliss/bench_btree.h ${CMAKE_SOURCE_DIR}/src/bliss/bench_art.h ) diff --git a/src/bliss/bench_rax.h b/src/bliss/bench_rax.h new file mode 100644 index 0000000..83416b0 --- /dev/null +++ b/src/bliss/bench_rax.h @@ -0,0 +1,47 @@ +#ifndef BLISS_BENCH_RAX +#define BLISS_BENCH_RAX + +#include + +#include "bliss/bliss_index.h" + +#include "rax.h" + + +namespace bliss { + +template +class BlissRaxIndex : public BlissIndex { + public: + rax* _index; + BlissRaxIndex() { + _index = raxNew(); + }; + + void bulkload( + std::vector> values) override { + // expects the pairs to be pre-sorted before performing bulk load + for (const auto& pair : values) { + put(pair.first, pair.second); + } + } + + bool get(KEY_TYPE key) override { + std::string keyStr = std::to_string(key); + void* result = raxFind(_index, (unsigned char*)keyStr.c_str(), keyStr.size()); + return (result == raxNotFound) ? false : true; + } + + void put(KEY_TYPE key, VALUE_TYPE value) override { + std::string keyStr = std::to_string(key); + raxInsert(_index, (unsigned char*)keyStr.c_str(), keyStr.size(), (void*)new VALUE_TYPE(value), NULL); + } + + void end_routine() override { + raxFree(_index); + } +}; + +} // namespace bliss + +#endif // !BLISS_BENCH_RAX From 2d37791d644f72ec386dcb7765f37d27a1320dcf Mon Sep 17 00:00:00 2001 From: Lucas Date: Mon, 23 Sep 2024 01:17:49 -0400 Subject: [PATCH 09/11] adding ART --- CMakeLists.txt | 1 - script/infra/pybliss.py | 1 + src/bliss/bench_rax.h | 47 ----------------------------------------- 3 files changed, 1 insertion(+), 48 deletions(-) delete mode 100644 src/bliss/bench_rax.h diff --git a/CMakeLists.txt b/CMakeLists.txt index a9a5e17..e262c69 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,7 +50,6 @@ add_library(bliss OBJECT ${CMAKE_SOURCE_DIR}/src/bliss/bliss_index.h ${CMAKE_SOURCE_DIR}/src/bliss/bench_lipp.h ${CMAKE_SOURCE_DIR}/src/bliss/bench_alex.h - ${CMAKE_SOURCE_DIR}/src/bliss/bench_rax.h ${CMAKE_SOURCE_DIR}/src/bliss/bench_btree.h ${CMAKE_SOURCE_DIR}/src/bliss/bench_art.h ) diff --git a/script/infra/pybliss.py b/script/infra/pybliss.py index 9cc4c53..f2c3802 100644 --- a/script/infra/pybliss.py +++ b/script/infra/pybliss.py @@ -68,6 +68,7 @@ def run_single_bliss_bench(self, args: BlissArgs) -> BlissStats: f"--file_type {'binary' if args.file_type else 'txt'}", "--use_preload" if args.use_preload else "", ] + print("this is cmd: " , " ".join(cmd)) process = subprocess.Popen( " ".join(cmd), stdout=subprocess.PIPE, diff --git a/src/bliss/bench_rax.h b/src/bliss/bench_rax.h deleted file mode 100644 index 83416b0..0000000 --- a/src/bliss/bench_rax.h +++ /dev/null @@ -1,47 +0,0 @@ -#ifndef BLISS_BENCH_RAX -#define BLISS_BENCH_RAX - -#include - -#include "bliss/bliss_index.h" - -#include "rax.h" - - -namespace bliss { - -template -class BlissRaxIndex : public BlissIndex { - public: - rax* _index; - BlissRaxIndex() { - _index = raxNew(); - }; - - void bulkload( - std::vector> values) override { - // expects the pairs to be pre-sorted before performing bulk load - for (const auto& pair : values) { - put(pair.first, pair.second); - } - } - - bool get(KEY_TYPE key) override { - std::string keyStr = std::to_string(key); - void* result = raxFind(_index, (unsigned char*)keyStr.c_str(), keyStr.size()); - return (result == raxNotFound) ? false : true; - } - - void put(KEY_TYPE key, VALUE_TYPE value) override { - std::string keyStr = std::to_string(key); - raxInsert(_index, (unsigned char*)keyStr.c_str(), keyStr.size(), (void*)new VALUE_TYPE(value), NULL); - } - - void end_routine() override { - raxFree(_index); - } -}; - -} // namespace bliss - -#endif // !BLISS_BENCH_RAX From 494be09d09a36bd35f15979138f50f3d84c65ace Mon Sep 17 00:00:00 2001 From: Lucas Date: Mon, 23 Sep 2024 22:46:08 -0400 Subject: [PATCH 10/11] reflecting comments --- script/infra/pybliss.py | 1 - src/bliss/bench_ART.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/script/infra/pybliss.py b/script/infra/pybliss.py index f2c3802..9cc4c53 100644 --- a/script/infra/pybliss.py +++ b/script/infra/pybliss.py @@ -68,7 +68,6 @@ def run_single_bliss_bench(self, args: BlissArgs) -> BlissStats: f"--file_type {'binary' if args.file_type else 'txt'}", "--use_preload" if args.use_preload else "", ] - print("this is cmd: " , " ".join(cmd)) process = subprocess.Popen( " ".join(cmd), stdout=subprocess.PIPE, diff --git a/src/bliss/bench_ART.h b/src/bliss/bench_ART.h index 92db384..c5e2be9 100644 --- a/src/bliss/bench_ART.h +++ b/src/bliss/bench_ART.h @@ -1,6 +1,7 @@ #ifndef BLISS_BENCH_ART #define BLISS_BENCH_ART +#include "ART.h" #include "ART.h" #include From b138d9b9bdb25373cdfae2af5567bc51528a2764 Mon Sep 17 00:00:00 2001 From: Lucas Date: Fri, 27 Sep 2024 00:13:10 -0400 Subject: [PATCH 11/11] art --- src/bliss/bench_ART.h | 5 +---- tests/test_art/CMakeLists.txt | 9 +++++++++ tests/test_art/art_tests.cpp | 31 +++++++++++++++++++++++++++++++ 3 files changed, 41 insertions(+), 4 deletions(-) create mode 100644 tests/test_art/CMakeLists.txt create mode 100644 tests/test_art/art_tests.cpp diff --git a/src/bliss/bench_ART.h b/src/bliss/bench_ART.h index c5e2be9..4c0c675 100644 --- a/src/bliss/bench_ART.h +++ b/src/bliss/bench_ART.h @@ -1,7 +1,6 @@ #ifndef BLISS_BENCH_ART #define BLISS_BENCH_ART -#include "ART.h" #include "ART.h" #include @@ -41,9 +40,7 @@ class BlissARTIndex : public BlissIndex { void put(KEY_TYPE key, VALUE_TYPE value) override { uint8_t ARTkey[KEY_SIZE]; ART::loadKey(key, ARTkey); - - uint8_t depth = 0; - ART::insert(_index, &_index, ARTkey, depth, value, VALUE_SIZE); + ART::insert(_index, &_index, ARTkey, 0, key, 8); } void end_routine() override {} diff --git a/tests/test_art/CMakeLists.txt b/tests/test_art/CMakeLists.txt new file mode 100644 index 0000000..b44a9ac --- /dev/null +++ b/tests/test_art/CMakeLists.txt @@ -0,0 +1,9 @@ +get_filename_component(EXEC ${CMAKE_CURRENT_SOURCE_DIR} NAME) +file(GLOB_RECURSE CPP_TESTS "*_tests.cpp") +add_executable(${EXEC} ${CPP_TESTS}) +target_link_libraries(${EXEC} PRIVATE +bliss +bliss_test_infra +GTest::gtest_main) +include(GoogleTest) +gtest_discover_tests(${EXEC}) \ No newline at end of file diff --git a/tests/test_art/art_tests.cpp b/tests/test_art/art_tests.cpp new file mode 100644 index 0000000..9bcefea --- /dev/null +++ b/tests/test_art/art_tests.cpp @@ -0,0 +1,31 @@ +#include "bliss_index_tests.h" + +class BTreeTest : public BlissIndexTest {}; + +TEST_F(BTreeTest, TestBTree_Sorted) { + index.reset(new bliss::BlissARTIndex()); + std::vector data; + GenerateData(data, num_keys); + + auto insert_start = data.begin(); + auto insert_end = data.end(); + executor::execute_inserts(*index, insert_start, insert_end); + + for (auto key : data) { + EXPECT_TRUE(index->get(key)); + } +} + +TEST_F(BTreeTest, TestBTree_Random) { + index.reset(new bliss::BlissARTIndex()); + std::vector data; + GenerateData(data, num_keys, false); + + auto insert_start = data.begin(); + auto insert_end = data.end(); + executor::execute_inserts(*index, insert_start, insert_end); + + for (auto key : data) { + EXPECT_TRUE(index->get(key)); + } +} \ No newline at end of file