From 73c2f170dbcf9939d86556c1b9cef2514ed33404 Mon Sep 17 00:00:00 2001 From: Richard Chapman Date: Wed, 23 Oct 2024 16:49:18 +0100 Subject: [PATCH] HPCC-32856 CKeyCursor should link parent nodes as well as current node Signed-off-by: Richard Chapman --- system/jhtree/jhtree.cpp | 114 ++++++++++++++++++++++++++++++++++++++- system/jhtree/jhtree.ipp | 4 ++ 2 files changed, 117 insertions(+), 1 deletion(-) diff --git a/system/jhtree/jhtree.cpp b/system/jhtree/jhtree.cpp index 762a4a440db..408b3b82397 100644 --- a/system/jhtree/jhtree.cpp +++ b/system/jhtree/jhtree.cpp @@ -1805,6 +1805,11 @@ CKeyCursor::CKeyCursor(const CKeyCursor &from) { nodeKey = from.nodeKey; node.set(from.node); + for (unsigned i = 0; i < maxParentNodes; i++) + { + parents[i].set(from.parents[i]); + parentNodeKeys[i] = from.parentNodeKeys[i]; + } unsigned keySize = key.keySize(); recordBuffer = (char *) malloc(keySize); // MORE - would be nice to know real max - is it stored in metadata? memcpy(recordBuffer, from.recordBuffer, key.keyedSize()); // Just make keyed portion valid initially @@ -1824,6 +1829,7 @@ void CKeyCursor::reset() { node.clear(); matched = false; + clearParentNodes(); eof = key.bloomFilterReject(*filter) || !filter->canMatch(); if (!eof) setLow(0); @@ -1840,6 +1846,7 @@ bool CKeyCursor::_next(IContextLogger *ctx) if (!node) { node.setown(key.locateFirstLeafNode(ctx)); + clearParentNodes(); // MORE - could do better, but may not be a lot of point? nodeKey = 0; return node && node->isKeyAt(nodeKey); } @@ -1857,9 +1864,33 @@ bool CKeyCursor::_next(IContextLogger *ctx) if (node != NULL) { nodeKey = 0; + // Update the nodekey indexes of any parent nodes that are still relevant, and clear any that are not + unsigned depth = key.getBranchDepth(); + while (depth) + { + depth--; + if (depth < maxParentNodes && parents[depth]) + { + unsigned numParentKeys = parents[depth]->getNumKeys(); + unsigned parentNodeKey = parentNodeKeys[depth]; + if (parentNodeKey < numParentKeys-1) + { + parentNodeKeys[depth] = parentNodeKey+1; + break; + } + parents[depth].clear(); + } + else + { + // We can't tell which parent node info is correct. Clear them all. + clearParentNodes(); + break; + } + } return node->isKeyAt(0); } } + clearParentNodes(); return false; } else @@ -1915,6 +1946,7 @@ bool CKeyCursor::_last(IContextLogger *ctx) { fullBufferValid = false; node.setown(key.locateLastLeafNode(ctx)); + clearParentNodes(); // Could in theory do better but unlikely to be any point if (node) { nodeKey = node->getNumKeys()-1; @@ -1940,12 +1972,43 @@ bool CKeyCursor::_gtEqual(IContextLogger *ctx) { int rc = node->compareValueAt(recordBuffer, ++nodeKey); if (rc <= 0) - return true; + { + return true; + } if (nodeKey < numKeys-1) { rc = node->compareValueAt(recordBuffer, numKeys-1); if (rc <= 0) + { lwm = nodeKey+1; + } + } + } + } + if (!lwm) + { + while (depth) + { + depth--; + if (depth < maxParentNodes) + { + const CJHSearchNode *startnode = parents[depth]; + if (startnode) + { + unsigned numParentKeys = startnode->getNumKeys(); + unsigned parentNodeKey = parentNodeKeys[depth]; + if (parentNodeKey < numParentKeys-1) + { + int rc = startnode->compareValueAt(recordBuffer, numParentKeys-1); + if (rc <= 0) + { + lwm = parentNodeKey; + node.set(startnode); + break; + } + } + parents[depth].clear(); + } } } } @@ -1978,6 +2041,12 @@ bool CKeyCursor::_gtEqual(IContextLogger *ctx) if (agetNumKeys()) { offset_t npos = node->getFPosAt(a); + if (depth < maxParentNodes) + { + parentNodeKeys[depth] = a; + parents[depth].swap(node); + } + lwm = 0; depth++; NodeType type = (depth < branchDepth) ? NodeBranch : NodeLeaf; node.setown(getCursorNode(npos, type, ctx)); @@ -2019,6 +2088,33 @@ bool CKeyCursor::_ltEqual(IContextLogger *ctx) } } if (!lwm) + { + while (depth) + { + depth--; + if (depth < maxParentNodes) + { + const CJHSearchNode *startnode = parents[depth]; + if (startnode) + { + unsigned numParentKeys = startnode->getNumKeys(); + unsigned parentNodeKey = parentNodeKeys[depth]; + if (parentNodeKey < numParentKeys-1) + { + int rc = startnode->compareValueAt(recordBuffer, numParentKeys-1); + if (rc < 0) + { + lwm = parentNodeKey; + node.set(startnode); + break; + } + } + parents[depth].clear(); + } + } + } + } + if (!lwm) { node.set(key.rootNode); depth = 0; @@ -2052,6 +2148,12 @@ bool CKeyCursor::_ltEqual(IContextLogger *ctx) if (a==node->getNumKeys()) a--; // value being looked for is off the end of the index. offset_t npos = node->getFPosAt(a); + if (depth < maxParentNodes) + { + parentNodeKeys[depth] = a; + parents[depth].swap(node); + } + lwm = 0; depth++; NodeType type = (depth < branchDepth) ? NodeBranch : NodeLeaf; node.setown(getCursorNode(npos, type, ctx)); @@ -2061,6 +2163,15 @@ bool CKeyCursor::_ltEqual(IContextLogger *ctx) } } +void CKeyCursor::clearParentNodes() +{ + for (unsigned i = 0; i < maxParentNodes; i++) + { + parents[i].clear(); + parentNodeKeys[i] = 0; + } +} + void CKeyCursor::serializeCursorPos(MemoryBuffer &mb) { mb.append(eof); @@ -2086,6 +2197,7 @@ void CKeyCursor::deserializeCursorPos(MemoryBuffer &mb, IContextLogger *ctx) { mb.read(eof); node.clear(); + clearParentNodes(); // we don't serialize parent node info if (!eof) { mb.read(matched); diff --git a/system/jhtree/jhtree.ipp b/system/jhtree/jhtree.ipp index 5c9e409bf70..b13c8e94a3a 100644 --- a/system/jhtree/jhtree.ipp +++ b/system/jhtree/jhtree.ipp @@ -201,10 +201,13 @@ public: class jhtree_decl CKeyCursor : public CInterfaceOf, implements INodeLoader { protected: + static constexpr unsigned maxParentNodes = 4; CKeyIndex &key; const IIndexFilterList *filter; char *recordBuffer = nullptr; Owned node; + Owned parents[maxParentNodes]; + unsigned int parentNodeKeys[maxParentNodes] = {0}; unsigned int nodeKey; mutable bool fullBufferValid = false; @@ -267,6 +270,7 @@ protected: // if _lookup returns true, recordBuffer will contain keyed portion of result bool _lookup(bool exact, unsigned lastSeg, bool unfiltered, IContextLogger *ctx); + void clearParentNodes(); void reportExcessiveSeeks(unsigned numSeeks, unsigned lastSeg, IContextLogger *ctx);