From 27ffdfd7ffd63fe63e67f1cf749f6865a727cc5c Mon Sep 17 00:00:00 2001 From: roy1994 Date: Thu, 11 Apr 2024 15:44:15 +0800 Subject: [PATCH] doc: update README --- README.md | 32 +++++-- src/backends/src/ServerCtl.cpp | 4 +- src/query/include/SearchResultBuilder.h | 1 + src/query/src/Queryer.cpp | 16 ++-- src/query/src/SearchResultBuilder.cpp | 4 + test/CMakeLists.txt | 11 +++ test/test_benchmark.cpp | 16 ++++ test/test_queryer.cpp | 7 +- test/test_skp_eff.cpp | 109 ++++++++++++++++++++++++ 9 files changed, 183 insertions(+), 17 deletions(-) create mode 100644 test/test_benchmark.cpp create mode 100644 test/test_skp_eff.cpp diff --git a/README.md b/README.md index 677dc50..e1fcbcf 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,31 @@ -## 参考资料 -1. [Lucene原理](https://www.infoq.cn/article/ejeg02vroegvalw4j_ll) +
+

OurSearchEngine

+

+ A simple search engine written in Cpp +

+
-## Requires +## Features +- + + +## Architecture +![Architecture](doc/pic/Arch.png) + + +## How to use +### Build ```bash -yay -S jsoncpp-cmake +cd OurSearchEngine git submodule update --init --recursive -``` \ No newline at end of file +cmake -B build +cmake --build build +``` + +### Use + + + +## Reference +1. [Lucene](https://www.infoq.cn/article/ejeg02vroegvalw4j_ll) diff --git a/src/backends/src/ServerCtl.cpp b/src/backends/src/ServerCtl.cpp index 729abde..fc88792 100644 --- a/src/backends/src/ServerCtl.cpp +++ b/src/backends/src/ServerCtl.cpp @@ -21,8 +21,8 @@ void ServerCtl::search(const drogon::HttpRequestPtr &req, ResponseCallback &&cal Json::StreamWriterBuilder builder; builder.settings_["emitUTF8"] = true; - spdlog::info("\n{}", Json::writeString(builder, ret)); - auto resp = drogon::HttpResponse::newHttpJsonResponse(ret); + // spdlog::info("\n{}", Json::writeString(builder, ret)); + auto resp = drogon::HttpResponse::newHttpJsonResponse(ret); // std::ifstream inputFile("../assets/test.json"); diff --git a/src/query/include/SearchResultBuilder.h b/src/query/include/SearchResultBuilder.h index 334d1ee..06dbd28 100644 --- a/src/query/include/SearchResultBuilder.h +++ b/src/query/include/SearchResultBuilder.h @@ -12,6 +12,7 @@ class SearchResultBuilder { void addItem(SearchResultItem &&item); void addItems(std::vector> &&items); void addPartsInfo(const std::map &parts); + void addPartsInfo(std::map &&parts); void addItemTotalNumber(uint64_t tot); Json::Value build(); diff --git a/src/query/src/Queryer.cpp b/src/query/src/Queryer.cpp index 27f230e..65d5b2f 100644 --- a/src/query/src/Queryer.cpp +++ b/src/query/src/Queryer.cpp @@ -17,9 +17,9 @@ using SG::Core::SkipList; Json::Value Queryer::get(const std::string &content, uint64_t rkBegin, uint64_t rkEnd) { SG::SearchResultBuilder ret; - SG::PartsInfo partsInfo = createPartsInfo(content); - ret.addPartsInfo(partsInfo); + SG::PartsInfo partsInfo = createPartsInfo(content); auto [resultList, totalCnt] = createResultList(partsInfo, rkBegin, rkEnd); + ret.addPartsInfo(std::move(partsInfo)); ret.addItems(std::move(resultList)); ret.addItemTotalNumber(totalCnt); return ret.build(); @@ -58,7 +58,7 @@ std::pair>, uint64_t> Queryer::cre uint64_t outputID = output / 400; std::ifstream inputFile("../assets/library/skl/" + std::to_string(outputID) + ".lib"); if (!inputFile.is_open()) { - spdlog::error("[Queryer::creatResultList] Failed to open json file"); + spdlog::error("[Queryer::creatResultList] Failed to open skl library file {}", std::to_string(outputID) + ".lib"); inputFile.close(); break; } @@ -78,11 +78,11 @@ std::pair>, uint64_t> Queryer::cre std::vector> combineResult = SkipList::combine(sls); - std::cout << combineResult.size() << std::endl; - for (auto &i : combineResult) { - std::cout << i.size() << std::endl; - } - std::cout << idfs.size() << std::endl; + // std::cout << combineResult.size() << std::endl; + // for (auto &i : combineResult) { + // std::cout << i.size() << std::endl; + // } + // std::cout << idfs.size() << std::endl; for (int i = 0; i < combineResult.size(); ++i) { double score = 0; diff --git a/src/query/src/SearchResultBuilder.cpp b/src/query/src/SearchResultBuilder.cpp index b48d170..4e8d97c 100644 --- a/src/query/src/SearchResultBuilder.cpp +++ b/src/query/src/SearchResultBuilder.cpp @@ -14,6 +14,10 @@ void SearchResultBuilder::addPartsInfo(const std::map &pa partsInfo = parts; } +void SearchResultBuilder::addPartsInfo(std::map &&parts) { + partsInfo = std::move(parts); +} + void SearchResultBuilder::addItem(SearchResultItem &&item) { results.push_back(std::make_unique(item)); } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 1d916f3..8fe82c5 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -32,6 +32,17 @@ target_link_libraries(test_queryer PUBLIC query ) +find_package(benchmark REQUIRED) + +add_executable(test_benchmark test_benchmark.cpp) +target_link_libraries(test_benchmark PUBLIC + query +) +target_link_libraries(test_benchmark PUBLIC + benchmark::benchmark +) + + if(UNIX AND CMAKE_BUILD_TYPE MATCHES "Debug") target_compile_options(test_skiplist PRIVATE -fno-omit-frame-pointer) target_compile_options(test_skiplist PRIVATE -fsanitize=address) diff --git a/test/test_benchmark.cpp b/test/test_benchmark.cpp new file mode 100644 index 0000000..2a78efb --- /dev/null +++ b/test/test_benchmark.cpp @@ -0,0 +1,16 @@ +#include "Queryer.h" +#include +#include +using namespace SG; + + +static void BM_Query(benchmark::State &state) { + Queryer &queryer = Queryer::getInstance(); + for (auto _ : state) { + auto ans = queryer.get("寿命", 0, 10); + benchmark::DoNotOptimize(ans); + } +} +BENCHMARK(BM_Query); + +BENCHMARK_MAIN(); \ No newline at end of file diff --git a/test/test_queryer.cpp b/test/test_queryer.cpp index e565dc8..f8ff1ae 100644 --- a/test/test_queryer.cpp +++ b/test/test_queryer.cpp @@ -2,9 +2,12 @@ #include using namespace SG; int main() { - Queryer &queryer = Queryer::getInstance(); - auto ans = queryer.get("使用寿命", 0, 10); + Queryer &queryer = Queryer::getInstance(); + auto ans = queryer.get("使用寿命", 0, 10); + auto ans2 = queryer.get("使用寿命", 0, 10); + Json::StreamWriterBuilder builder; builder.settings_["emitUTF8"] = true; spdlog::info("\n{}", Json::writeString(builder, ans)); + spdlog::info("\n{}", Json::writeString(builder, ans2)); } \ No newline at end of file diff --git a/test/test_skp_eff.cpp b/test/test_skp_eff.cpp new file mode 100644 index 0000000..27728f0 --- /dev/null +++ b/test/test_skp_eff.cpp @@ -0,0 +1,109 @@ +#include "SkipList.h" +#include +#include +#include +#include +#include + +struct Item { + int id; + Item() = default; + Item(int id) + : id{id} {} + bool operator==(const Item &t) const { + return id == t.id; + } + bool operator<(const Item &t) const & { + return id < t.id; + } + bool operator<(const Item &t) const && { + return id < t.id; + } + + friend std::ostream &operator<<(std::ostream &os, const Item &t) { + os << '(' << t.id << ')'; + return os; + } + + static Json::Value toJson(const Item &t) { + Json::Value ret; + ret["id"] = t.id; + return ret; + } + static Item fromJson(const Json::Value &src) { + return {src["id"].asInt()}; + } +}; +using SG::Core::SkipList; + +void test(const int &size, std::ofstream &outputFile) { + srand(time(nullptr)); + + SkipList skipList; + + const int largeSize = size; + + // test insertion + std::vector insertData; + srand(time(nullptr)); + for (int i = 0; i < largeSize; i++) { + insertData.push_back(rand() % largeSize); + } + + auto start = std::chrono::steady_clock::now(); + for (int i = 0; i < insertData.size(); i++) { + skipList.insert(Item(insertData[i])); + } + auto end = std::chrono::steady_clock::now(); + auto duration = std::chrono::duration_cast(end - start); + outputFile << size << "," << duration.count(); + + // outputFile << "\n========\n" + // << skipList.dump().toStyledString() << "\n========\n"; + // std::cout << "=====\n"; + // test deletion + // std::vector removeData; + // srand(time(nullptr)); + // for (int i = 0; i < largeSize; i++) { + // removeData.push_back(rand() % largeSize); + // } + + // start = std::chrono::steady_clock::now(); + // for (int i = 0; i < removeData.size(); i++) { + // skipList.remove(removeData[i]); + // } + // end = std::chrono::steady_clock::now(); + // duration = std::chrono::duration_cast(end - start); + // outputFile << "," << duration.count(); + + // test search + std::vector searchData; + for (int i = 0; i < largeSize; i++) { + searchData.push_back(rand() % largeSize); + } + + start = std::chrono::steady_clock::now(); + for (int i = 0; i < searchData.size(); i++) { + auto l = skipList.search(Item(searchData[i])); + } + end = std::chrono::steady_clock::now(); + duration = std::chrono::duration_cast(end - start); + outputFile << "," << duration.count() << std::endl; + + // std::cout << "=====\n"; + + // skipList.print(); +} + +int main() { + std::ofstream outputFile("result.csv"); + outputFile << "DataSize,InsertionTime,SearchTime" << std::endl; + + for (int i = 100; i <= 10000000; i *= 10) { + test(i, outputFile); + } + + outputFile.close(); + + return 0; +}