From 26364a229e4ec6f0b04e70fc8696570478970844 Mon Sep 17 00:00:00 2001 From: Sebastian Hiebl Date: Sun, 5 Nov 2023 20:41:44 +0100 Subject: [PATCH] add lazy loading of subsequent chunks --- src/ChunkedJournal.hpp | 69 +++++++++++++++++++++++++++++++----------- 1 file changed, 51 insertions(+), 18 deletions(-) diff --git a/src/ChunkedJournal.hpp b/src/ChunkedJournal.hpp index 72ec62c..c1cbaed 100644 --- a/src/ChunkedJournal.hpp +++ b/src/ChunkedJournal.hpp @@ -4,6 +4,7 @@ #include "SdJournal.hpp" #include "SdLine.hpp" +#include #include #include #include @@ -13,26 +14,35 @@ namespace jess { static constexpr auto uChunkSize = 1024; class ChunkedJournal { + enum class Contiguity { + CONTIGUOUS, + NON_CONTIGUOUS, + OVERLAPPING, + }; + + enum class InsertPosition { + BEFORE, + AFTER, + }; + struct Chunk { // we might need multiple cursors depending on the sequence id SdCursor beginning; SdCursor end; std::vector lines; + Contiguity contiguityBeginning{Contiguity::NON_CONTIGUOUS}; + Contiguity contiguityEnd{Contiguity::NON_CONTIGUOUS}; std::partial_ordering operator<=>(const Chunk &other) const { return beginning <=> other.beginning; } }; SdJournal m_journal{}; std::list m_chunks{}; - Chunk *m_pCurrentChunk{nullptr}; + decltype(m_chunks.begin()) m_pCurrentChunk{m_chunks.begin()}; size_t m_uLineOffsetInChunk{0}; - auto addChunk(Chunk chunk) -> decltype(m_chunks.begin()) { - auto it = std::upper_bound(m_chunks.begin(), m_chunks.end(), chunk); - return m_chunks.insert(it, std::move(chunk)); - } - - auto loadChunk(const SdCursor &cursor) -> decltype(m_chunks.begin()) { + auto createChunkFromCurrentPosition(const SdCursor &cursor, InsertPosition insertPos, Contiguity contiguity) + -> decltype(m_chunks.begin()) { Chunk newChunk{cursor, cursor, {}}; newChunk.lines.reserve(uChunkSize); @@ -46,35 +56,58 @@ class ChunkedJournal { newChunk.end = m_journal.getCursor(); - return addChunk(std::move(newChunk)); + auto insertIt = m_pCurrentChunk; + if (insertPos == InsertPosition::AFTER) { + std::advance(insertIt, 1); + } + auto newChunkIt = m_chunks.insert(insertIt, std::move(newChunk)); + + if (insertPos == InsertPosition::BEFORE) { + newChunkIt->contiguityEnd = contiguity; + if(m_pCurrentChunk != m_chunks.end()) { + m_pCurrentChunk->contiguityBeginning = contiguity; + } + } else if (insertPos == InsertPosition::AFTER) { + if(m_pCurrentChunk != m_chunks.end()) { + m_pCurrentChunk->contiguityEnd = contiguity; + } + newChunkIt->contiguityBeginning = contiguity; + } + + return newChunkIt; } public: - void seekToBof() { m_journal.seekToBof(); m_journal.next(); auto cursorBof = m_journal.getCursor(); if (m_chunks.empty() || m_chunks.front().beginning == cursorBof) { - m_pCurrentChunk = loadChunk(cursorBof).operator->(); + m_pCurrentChunk = createChunkFromCurrentPosition(cursorBof, InsertPosition::BEFORE, Contiguity::NON_CONTIGUOUS); } - m_pCurrentChunk = &m_chunks.front(); m_uLineOffsetInChunk = 0; } void seekLines(int64_t uNumLines) { - if(uNumLines < 0 && static_cast(std::abs(uNumLines)) > m_uLineOffsetInChunk) { + assert(m_pCurrentChunk != m_chunks.end()); + + if (uNumLines < 0 && static_cast(std::abs(uNumLines)) > m_uLineOffsetInChunk) { m_uLineOffsetInChunk = 0; - // todo: modulo logic? // todo: load previous chunk return; } - if(m_uLineOffsetInChunk + uNumLines > uChunkSize) { - m_uLineOffsetInChunk = 0; - // todo: modulo logic? - // todo: load next chunk + if (m_uLineOffsetInChunk + uNumLines > m_pCurrentChunk->lines.size()) { + int64_t uLinesAfterEndOfChunk = + uNumLines - static_cast(m_pCurrentChunk->lines.size() - m_uLineOffsetInChunk); + size_t uNumChunksToSkip = uLinesAfterEndOfChunk / uChunkSize; + m_uLineOffsetInChunk = uLinesAfterEndOfChunk - uNumChunksToSkip * uChunkSize; + m_journal.seekForward(uNumChunksToSkip * uChunkSize); + m_journal.next(); + auto cursorBeginningOfChunk = m_journal.getCursor(); + Contiguity contiguity = uNumChunksToSkip == 0 ? Contiguity::CONTIGUOUS : Contiguity::NON_CONTIGUOUS; + m_pCurrentChunk = createChunkFromCurrentPosition(cursorBeginningOfChunk, InsertPosition::AFTER, contiguity); return; } @@ -82,7 +115,7 @@ class ChunkedJournal { } std::span getLines(size_t uNumLines) { - if (m_pCurrentChunk == nullptr) { + if (m_pCurrentChunk == m_chunks.end()) { return {}; } std::span ret{m_pCurrentChunk->lines};