From 94dbfb775901fde0d006f73f22c39e8c61b8c505 Mon Sep 17 00:00:00 2001 From: Chang Guo Date: Fri, 26 Apr 2024 09:57:03 -0700 Subject: [PATCH] solution 1.1 cut and fetch multiple times --- .gitignore | 5 + source/adios2/engine/bp5/BP5Reader.cpp | 57 ++++-- source/adios2/toolkit/cache/KVCacheCommon.h | 28 +-- source/adios2/toolkit/cache/KVCacheCommon.inl | 66 ++++-- source/adios2/toolkit/cache/QueryBox.h | 188 ++++++++---------- 5 files changed, 174 insertions(+), 170 deletions(-) diff --git a/.gitignore b/.gitignore index 81ff18108..be42b134f 100644 --- a/.gitignore +++ b/.gitignore @@ -40,3 +40,8 @@ docs/pyvenv.cfg # Visual Studio .vs/ CMakeSettings.json + +# gc +.idea/ +build-python/ +cmake-build-debug/ diff --git a/source/adios2/engine/bp5/BP5Reader.cpp b/source/adios2/engine/bp5/BP5Reader.cpp index 99347cbb0..7e610277a 100644 --- a/source/adios2/engine/bp5/BP5Reader.cpp +++ b/source/adios2/engine/bp5/BP5Reader.cpp @@ -293,13 +293,13 @@ void BP5Reader::PerformRemoteGets() for (auto &Req : GetRequests) { const DataType varType = m_IO.InquireVariableType(Req.VarName); - size_t numOfElements = m_KVCacheCommon.size(Req.Count); + QueryBox targetBox(Req.Start, Req.Count); + size_t numOfElements = targetBox.size(); + std::string keyPrefix = m_KVCacheCommon.keyPrefix(Req.VarName, Req.RelStep, Req.BlockID); + std::string cacheKey = m_KVCacheCommon.keyComposition(keyPrefix, Req.Start, Req.Count); if (getenv("useKVCache")) { - std::string cacheKey; - m_KVCacheCommon.keyComposition(Req.VarName, Req.RelStep, Req.BlockID, Req.Start, - Req.Count, cacheKey); if (m_KVCacheCommon.exists(cacheKey)) { @@ -315,17 +315,47 @@ void BP5Reader::PerformRemoteGets() #undef declare_type_get continue; } else { - bool fullContained = false; - std::cout << "cacheKey: " << cacheKey << std::endl; - QueryBox targetBox(Req.Start, Req.Count); - + int max_depth = 1; std::set samePrefixKeys; - m_KVCacheCommon.keyPrefixExistence(Req.VarName, Req.RelStep, Req.BlockID, samePrefixKeys); + m_KVCacheCommon.keyPrefixExistence(keyPrefix, samePrefixKeys); + std::vector regularBoxes; + std::vector cachedBoxes; + std::vector cachedKeys; + m_KVCacheCommon.getMaxInteractBox(samePrefixKeys, targetBox, max_depth, 0, regularBoxes, cachedBoxes, cachedKeys); + +#define declare_type_full_contain(T) \ + if (varType == helper::GetDataType()) \ + { \ + const int typeSize = sizeof(T); \ + std::vector reqData; \ + reqData.resize(numOfElements); \ + for (auto &box : regularBoxes){ \ + std::string boxKey = m_KVCacheCommon.keyComposition(keyPrefix, box.start, box.count); \ + std::vector srcData; \ + srcData.resize(box.size()); \ + m_Remote.Get(Req.VarName, Req.RelStep, Req.BlockID, box.count, box.start, srcData.data()); \ + helper::NdCopy(reinterpret_cast(srcData.data()), helper::CoreDims(box.start), box.count, true, false, reinterpret_cast(reqData.data()), Req.Start, Req.Count, true, false, typeSize); \ + m_KVCacheCommon.set(boxKey, srcData); \ + } \ + for (int i = 0; i < cachedBoxes.size(); i++){ \ + std::string boxKey = cachedKeys[i]; \ + QueryBox box(boxKey); \ + std::vector srcData; \ + srcData.resize(box.size()); \ + m_KVCacheCommon.get(boxKey, srcData); \ + helper::NdCopy(reinterpret_cast(srcData.data()), helper::CoreDims(cachedBoxes[i].start), cachedBoxes[i].count, true, false, reinterpret_cast(reqData.data()), Req.Start, Req.Count, true, false, typeSize); \ + } \ + std::memcpy(Req.Data, reqData.data(), numOfElements * sizeof(T)); \ + } +ADIOS2_FOREACH_PRIMITIVE_STDTYPE_1ARG(declare_type_full_contain) +#undef declare_type_full_contain + +/* + bool fullContained = false; for (auto &key : samePrefixKeys) { std::cout << "key: " << key << std::endl; - QueryBox box; - m_KVCacheCommon.extractStartCount(key, box.start, box.count); + QueryBox box(key); if (targetBox.isFullContainedBy(box)) { const size_t boxNumOfElements = box.size(); @@ -353,15 +383,14 @@ void BP5Reader::PerformRemoteGets() { continue; } +*/ + } } m_Remote.Get(Req.VarName, Req.RelStep, Req.BlockID, Req.Count, Req.Start, Req.Data); if (getenv("useKVCache")) { - std::string cacheKey; - m_KVCacheCommon.keyComposition(Req.VarName, Req.RelStep, Req.BlockID, Req.Start, - Req.Count, cacheKey); #define declare_type_set(T) \ if (varType == helper::GetDataType()) \ diff --git a/source/adios2/toolkit/cache/KVCacheCommon.h b/source/adios2/toolkit/cache/KVCacheCommon.h index 736ced5cb..6dba08c08 100644 --- a/source/adios2/toolkit/cache/KVCacheCommon.h +++ b/source/adios2/toolkit/cache/KVCacheCommon.h @@ -41,33 +41,13 @@ class KVCacheCommon inline bool exists(std::string key); - inline void keyComposition(char *VarName, size_t AbsStep, size_t BlockID, Dims Start, Dims Count, std::string &cacheKey); + inline std::string keyPrefix(char *VarName, size_t AbsStep, size_t BlockID); - inline void keyPrefixExistence(char *VarName, size_t AbsStep, size_t BlockID, std::set &keys); + inline std::string keyComposition(const std::string &key_prefix, Dims Start, Dims Count); - inline void extractStartCount(const std::string &key, Dims &Start, Dims &Count); + inline void keyPrefixExistence(const std::string &key_prefix, std::set &keys); -// template -// void serializeVector(const std::vector& vec, std::string& serializedString) { -// nlohmann::json j = vec; -// serializedString = j.dump(); -// } -// -// template -// void deserializeVector(const std::string& str, std::vector& vec) { -// nlohmann::json j = nlohmann::json::parse(str); -// vec = j.get>(); -// } - - size_t size(Dims Count) const - { - size_t size = 1; - for(auto i: Count) - { - size *= i; - } - return size; - } + inline void getMaxInteractBox(const std::set &samePrefixKeys, const QueryBox &queryBox, const size_t &max_depth, size_t current_depth, std::vector ®ularBoxes, std::vector &cachedBoxes, std::vector &cachedKeys); inline std::string base64Encode(const std::vector& data); diff --git a/source/adios2/toolkit/cache/KVCacheCommon.inl b/source/adios2/toolkit/cache/KVCacheCommon.inl index 9ceb76296..16c067a8d 100644 --- a/source/adios2/toolkit/cache/KVCacheCommon.inl +++ b/source/adios2/toolkit/cache/KVCacheCommon.inl @@ -113,11 +113,15 @@ bool KVCacheCommon::exists(std::string key) } } -void KVCacheCommon::keyComposition(char *VarName, size_t AbsStep, size_t BlockID, Dims Start, Dims Count, std::string &cacheKey) +std::string KVCacheCommon::keyPrefix(char *VarName, size_t AbsStep, size_t BlockID) +{ + return VarName + std::to_string(AbsStep) + std::to_string(BlockID); +} + +std::string KVCacheCommon::keyComposition(const std::string &key_prefix, Dims Start, Dims Count) { - std::string key = VarName + std::to_string(AbsStep) + std::to_string(BlockID); std::string box = QueryBox::serializeQueryBox(QueryBox{Start, Count}); - cacheKey = key + box; + std::string cacheKey = key_prefix + box; // replace special characters std::replace(cacheKey.begin(), cacheKey.end(), '"', '_'); std::replace(cacheKey.begin(), cacheKey.end(), ',', '_'); @@ -127,12 +131,12 @@ void KVCacheCommon::keyComposition(char *VarName, size_t AbsStep, size_t BlockID std::replace(cacheKey.begin(), cacheKey.end(), ']', '_'); std::replace(cacheKey.begin(), cacheKey.end(), '{', '_'); std::replace(cacheKey.begin(), cacheKey.end(), '}', '_'); + return cacheKey; } -void KVCacheCommon::keyPrefixExistence(char *VarName, size_t AbsStep, size_t BlockID, std::set &keys) +void KVCacheCommon::keyPrefixExistence(const std::string &key_prefix, std::set &keys) { - std::string key = VarName + std::to_string(AbsStep) + std::to_string(BlockID); - std::string keyPattern = key + "*"; + std::string keyPattern = key_prefix + "*"; m_command = "KEYS " + keyPattern; m_redisReply = (redisReply *)redisCommand(m_redisContext, m_command.c_str()); if (m_redisReply == NULL) @@ -149,25 +153,43 @@ void KVCacheCommon::keyPrefixExistence(char *VarName, size_t AbsStep, size_t Blo } } -void KVCacheCommon::extractStartCount(const std::string &key, Dims &Start, Dims &Count) +void KVCacheCommon::getMaxInteractBox(const std::set &samePrefixKeys, const QueryBox &queryBox, const size_t &max_depth, size_t current_depth, std::vector ®ularBoxes, std::vector &cachedBox, std::vector &cachedKeys) { - // sample key: "U3218446744073709551615__count_:_64_64_64___start_:_0_0_0__", count [64, 64, 64], start [0, 0, 0] - // using Dims = std::vector; - auto lf_ExtractDimensions = [](const std::string &key, const std::string &delimiter) -> Dims { - size_t pos = key.find(delimiter); - size_t end = key.find("__", pos + delimiter.length()); - std::string dimStr = key.substr(pos + delimiter.length(), end - pos - delimiter.length()); - Dims dimensions; - std::istringstream dimStream(dimStr); - std::string token; - while (std::getline(dimStream, token, '_')) { - dimensions.push_back(std::stoul(token)); + if (current_depth > max_depth) + { + return; + } + current_depth++; + QueryBox maxInteractBox; + std::string maxInteractKey; + for (auto &key : samePrefixKeys) + { + QueryBox cachedBox(key); + QueryBox intersection; + if (queryBox.isInteracted(cachedBox, intersection)) + { + if (maxInteractBox.size() < intersection.size()) + { + maxInteractBox = intersection; + maxInteractKey = key; + } } - return dimensions; - }; + } + + cachedBox.push_back(maxInteractBox); + cachedKeys.push_back(maxInteractKey); - Start = lf_ExtractDimensions(key, "__start_:_"); - Count = lf_ExtractDimensions(key, "__count_:_"); + if (current_depth == max_depth) + { + maxInteractBox.interactionCut(queryBox, regularBoxes); + } else { + std::vector nextBoxes; + maxInteractBox.interactionCut(queryBox, nextBoxes); + for (auto &box : nextBoxes) + { + getMaxInteractBox(samePrefixKeys, box, max_depth, current_depth, regularBoxes, cachedBox, cachedKeys); + } + } } std::string KVCacheCommon::base64Encode(const std::vector &data) diff --git a/source/adios2/toolkit/cache/QueryBox.h b/source/adios2/toolkit/cache/QueryBox.h index 5bf6a68a0..6e42d1292 100644 --- a/source/adios2/toolkit/cache/QueryBox.h +++ b/source/adios2/toolkit/cache/QueryBox.h @@ -22,6 +22,25 @@ class QueryBox // constructor QueryBox() = default; QueryBox(const adios2::Dims &start, const adios2::Dims &count) : start(start), count(count){}; + QueryBox(const std::string &key){ + // sample key: "U3218446744073709551615__count_:_64_64_64___start_:_0_0_0__", count [64, 64, 64], start [0, 0, 0] + // using Dims = std::vector; + auto lf_ExtractDimensions = [](const std::string &key, const std::string &delimiter) -> Dims { + size_t pos = key.find(delimiter); + size_t end = key.find("__", pos + delimiter.length()); + std::string dimStr = key.substr(pos + delimiter.length(), end - pos - delimiter.length()); + Dims dimensions; + std::istringstream dimStream(dimStr); + std::string token; + while (std::getline(dimStream, token, '_')) { + dimensions.push_back(std::stoul(token)); + } + return dimensions; + }; + + this->start = lf_ExtractDimensions(key, "__start_:_"); + this->count = lf_ExtractDimensions(key, "__count_:_"); + } // size size_t size() const @@ -53,8 +72,14 @@ class QueryBox return box; } + // determine if a query box is equal to another query box + bool operator==(const QueryBox &box) const + { + return start == box.start && count == box.count; + } + // determine if a query box is interacted in another query box, return intersection part as a new query box - bool isInteracted(const QueryBox &box, QueryBox &intersection) + bool isInteracted (const QueryBox &box, QueryBox &intersection) const { if (start.size() != box.start.size() || start.size() != count.size() || start.size() != box.count.size()) @@ -94,134 +119,77 @@ class QueryBox } return true; } - - /* - template - void copyContainedData(const QueryBox& cacheBox, std::vector& srcData, std::vector& dstData) { - size_t dim = this->start.size(); - - // find the continuous dimensions - size_t nContDim = 1; - while (nContDim <= dim - 1 && - this->count[dim - nContDim] == cacheBox.count[dim - nContDim] && - this->start[dim - nContDim] == cacheBox.start[dim - nContDim]) - { - ++nContDim; - } - // Note: 1 <= nContDim <= dimensions - size_t blockSize = 1; - size_t inOvlpSize = 1; - for (size_t i = 1; i <= nContDim; ++i) - { - blockSize *= (this->count[dim - i]); - inOvlpSize *= (cacheBox.count[dim - i]); - } - - // find the base of the intersection part - std::vector inOvlpCount(dim, 0); - inOvlpCount[dim - 1] = 1; - for (size_t i = dim - 2; i >= 0; i--) - { - inOvlpCount[i] = this->count[i + 1] * inOvlpCount[i + 1]; - } - - size_t inOvlpBase = 0; - for (size_t i = 0; i < dim; i++) - { - inOvlpBase += inOvlpCount[i] * (intersection.start[i] - cacheBox.start[i]); - } - - // const cacheBox size - const size_t cacheBoxSize = cacheBox.size(); - for (size_t i = 0; i * blockSize >= this->size(); i++){ - // copy data from intersection part to data - std::memcpy(dstData.data() + i * blockSize, srcData.data() + inOvlpBase, blockSize * sizeof(T)); - inOvlpBase += inOvlpSize; - } - - // copy data from srcData to dstData - - } - // if a query box is interacted with one of previous query boxes, return the remaining part as a set of query boxes intersection is inside outer - std::set getRemaining2D(const QueryBox &outer, const QueryBox &intersection) + // cut a query box from another interaction box, return a list of regular box + // remainingBox is the big one, this is small one + void interactionCut(const QueryBox &remainingBox, std::vector ®ularBoxes) { - std::set remaining; - bool isRowMajor = true; - // copy outer box - QueryBox outerCopy; - for (size_t i = 0; i < outer.start.size(); i++) + if (remainingBox == *this) { - outerCopy.start.push_back(outer.start[i]); - outerCopy.count.push_back(outer.count[i]); + return; } - if (isRowMajor) + // find the max cut dimension + size_t maxCutDimSize = 0; + QueryBox maxCutDimBox; + for (size_t i = 0; i < start.size(); ++i) { - bool cutting = true; - while (cutting) + if (start[i] == remainingBox.start[i] && count[i] == remainingBox.count[i]) { - // if fully matched, no remaining - if (outerCopy.start == intersection.start && outerCopy.count == intersection.count) - { - cutting = false; - break; + continue; + } + else { + if (start[i] != remainingBox.start[i]){ + size_t cutDimDiff = start[i] - remainingBox.start[i]; + size_t cutDimSize = remainingBox.size() / remainingBox.count[i] * cutDimDiff; + if (cutDimSize > maxCutDimSize) + { + maxCutDimSize = cutDimSize; + maxCutDimBox = QueryBox(remainingBox.start, remainingBox.count); + maxCutDimBox.count[i] = cutDimDiff; + } } - QueryBox box; - box.start = outerCopy.start; - box.count = outerCopy.count; - // if not fully matched, check each dimension - // first, cut from tail of the first dimension - if (outerCopy.start[0] == intersection.start[0] and - outerCopy.count[0] != intersection.count[0]) - { - box.start[0] = intersection.start[0] + intersection.count[0]; - box.count[0] = outerCopy.count[0] - intersection.count[0]; - remaining.insert(box); - - outerCopy.count[0] = intersection.count[0]; - continue; - } - - // second, cut from head of the first dimension - if (outerCopy.start[0] != intersection.start[0]) - { - box.count[0] = intersection.start[0] - outerCopy.start[0]; - remaining.insert(box); - outerCopy.count[0] = - outerCopy.start[0] + outerCopy.count[0] - intersection.start[0]; - outerCopy.start[0] = intersection.start[0]; - continue; + if (start[i] + count[i] != remainingBox.start[i] + remainingBox.count[i]){ + size_t cutDimDiff = remainingBox.start[i] + remainingBox.count[i] - start[i] - count[i]; + size_t cutDimSize = remainingBox.size() / count[i] * cutDimDiff; + if (cutDimSize > maxCutDimSize) + { + maxCutDimSize = cutDimSize; + maxCutDimBox = QueryBox(remainingBox.start, remainingBox.count); + maxCutDimBox.start[i] = start[i] + count[i]; + maxCutDimBox.count[i] = cutDimDiff; + } } + } + } - // third, cut from tail of the second dimension - if (outerCopy.start[1] == intersection.start[1] and - outerCopy.count[1] != intersection.count[1]) + // cut the max cut dimension + if (maxCutDimSize > 0) + { + regularBoxes.push_back(maxCutDimBox); + QueryBox remainingBox1 = QueryBox(remainingBox.start, remainingBox.count); + for (size_t i = 0; i < remainingBox.start.size(); ++i) + { + if (maxCutDimBox.start[i] == remainingBox.start[i] && maxCutDimBox.count[i] == remainingBox.count[i]) { - box.start[1] = intersection.start[1] + intersection.count[1]; - box.count[1] = outerCopy.count[1] - intersection.count[1]; - remaining.insert(box); - - outerCopy.count[1] = intersection.count[1]; continue; } + else { + if (maxCutDimBox.start[i] != remainingBox.start[i]) + { + remainingBox1.count[i] = maxCutDimBox.start[i] - remainingBox.start[i]; + } else { + remainingBox1.start[i] = maxCutDimBox.start[i] + maxCutDimBox.count[i]; + remainingBox1.count[i] = remainingBox.start[i] + remainingBox.count[i] - remainingBox1.start[i]; + } - // fourth, cut from head of the second dimension - if (outerCopy.start[1] != intersection.start[1]) - { - box.count[1] = intersection.start[1] - outerCopy.start[1]; - remaining.insert(box); - outerCopy.count[1] = - outerCopy.start[1] + outerCopy.count[1] - intersection.start[1]; - outerCopy.start[1] = intersection.start[1]; - continue; } } + interactionCut(remainingBox1, regularBoxes); } - return remaining; } - */ + }; }; #endif // UNITTEST_QUERYBOX_H