Skip to content

Commit

Permalink
Init inverted index reader and writer interfaces (#412)
Browse files Browse the repository at this point in the history
  • Loading branch information
yingfeng authored Jan 3, 2024
1 parent 9b6c6a0 commit 3c67f2f
Show file tree
Hide file tree
Showing 19 changed files with 502 additions and 16 deletions.
2 changes: 1 addition & 1 deletion src/common/memory/memory_pool.cppm
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ protected:

export class RecyclePool : public MemoryPool {
public:
RecyclePool(SizeT chunkSize, SizeT alignSize = 1);
RecyclePool(SizeT chunk_size = DEFAULT_CHUNK_SIZE, SizeT align_size = DEFAULT_ALIGN_SIZE);

~RecyclePool() {}

Expand Down
49 changes: 49 additions & 0 deletions src/storage/invertedindex/column_indexer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
module;

import stl;
import memory_pool;
import segment_posting;
import index_segment_reader;
import posting_iterator;
import index_defines;
import index_config;
import index_segment_reader;
import posting_writer;
import data_block;
import parser;
import column_vector;
module column_indexer;

namespace infinity {

ColumnIndexer::ColumnIndexer(u64 column_id,
const InvertedIndexConfig &index_config,
SharedPtr<MemoryPool> byte_slice_pool,
SharedPtr<RecyclePool> buffer_pool)
: column_id_(column_id), index_config_(index_config), byte_slice_pool_(byte_slice_pool), buffer_pool_(buffer_pool) {
posting_table_ = new PostingTable;
}

ColumnIndexer::~ColumnIndexer() {
if (posting_table_) {
for (PostingTable::iterator it = posting_table_->begin(); it != posting_table_->end(); ++it) {
PostingWriter *posting_writer = it->second;
posting_writer->~PostingWriter();
buffer_pool_->Deallocate((void *)posting_writer, sizeof(PostingWriter));
}
posting_table_->clear();
}
}

void ColumnIndexer::Add(SharedPtr<ColumnVector> column_vector, Vector<RowID> &row_ids) {}

void ColumnIndexer::DoAddPosting(const String &term) {
PostingTable::iterator it = posting_table_->find(term);
if (it == posting_table_->end()) {
PostingWriter *posting_writer = new (buffer_pool_->Allocate(sizeof(PostingWriter)))
PostingWriter(byte_slice_pool_.get(), buffer_pool_.get(), index_config_.GetPostingFormatOption());
posting_table_->emplace(term, posting_writer);
}
}

} // namespace infinity
37 changes: 37 additions & 0 deletions src/storage/invertedindex/column_indexer.cppm
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
module;

import stl;
import memory_pool;
import segment_posting;
import index_segment_reader;
import posting_iterator;
import index_defines;
import index_config;
import index_segment_reader;
import posting_writer;
import data_block;
import parser;
import column_vector;
export module column_indexer;

namespace infinity {
export class ColumnIndexer {
using PostingTable = HashMap<String, PostingWriter *>;

public:
ColumnIndexer(u64 column_id, const InvertedIndexConfig &index_config, SharedPtr<MemoryPool> byte_slice_pool, SharedPtr<RecyclePool> buffer_pool);
~ColumnIndexer();

void Add(SharedPtr<ColumnVector> column_vector, Vector<RowID> &row_ids);

private:
void DoAddPosting(const String &term);

private:
u64 column_id_;
InvertedIndexConfig index_config_;
SharedPtr<MemoryPool> byte_slice_pool_;
SharedPtr<RecyclePool> buffer_pool_;
PostingTable *posting_table_{nullptr};
};
} // namespace infinity
14 changes: 14 additions & 0 deletions src/storage/invertedindex/disk_segment_reader.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
module;

import stl;
import memory_pool;
import segment_posting;
import index_defines;
import index_segment_reader;
import index_config;
import segment;
module disk_index_segment_reader;

namespace infinity {
DiskIndexSegmentReader::DiskIndexSegmentReader(const String &root_path, const Segment &segment) : path_(root_path) {}
} // namespace infinity
26 changes: 26 additions & 0 deletions src/storage/invertedindex/disk_segment_reader.cppm
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
module;

import stl;
import memory_pool;
import segment_posting;
import index_defines;
import index_segment_reader;
import index_config;
import segment;
export module disk_index_segment_reader;

namespace infinity {
export class DiskIndexSegmentReader : public IndexSegmentReader {
public:
DiskIndexSegmentReader(const String &root_path, const Segment &segment);
virtual ~DiskIndexSegmentReader() = default;

bool GetSegmentPosting(const String &term, docid_t base_doc_id, SegmentPosting &seg_posting, MemoryPool *session_pool) const override {
return false;
}

private:
String path_;
};

} // namespace infinity
17 changes: 4 additions & 13 deletions src/storage/invertedindex/format/posting_list_format.cppm
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import stl;
import doc_list_format_option;
import pos_list_format_option;
import posting_value;
import index_config;
import index_defines;
export module posting_list_format;

Expand All @@ -15,9 +14,10 @@ public:
inline PostingFormatOption(optionflag_t flag = OPTION_FLAG_ALL) : has_term_payload_(false) { InitOptionFlag(flag); }
~PostingFormatOption() = default;

inline void Init(const SharedPtr<InvertedIndexConfig> &index_config) {
InitOptionFlag(index_config->GetOptionFlag());
doc_list_format_option_.SetShortListVbyteCompress(index_config->IsShortListVbyteCompress());
inline void InitOptionFlag(optionflag_t flag) {
has_term_payload_ = flag & of_term_payload;
doc_list_format_option_.Init(flag);
pos_list_format_option_.Init(flag);
}

bool HasTfBitmap() const { return doc_list_format_option_.HasTfBitmap(); }
Expand All @@ -43,16 +43,7 @@ public:
bool operator==(const PostingFormatOption &right) const;

bool IsOnlyTermPayLoad() const { return HasTermPayload() && !HasTfBitmap() && !HasPositionList(); }

PostingFormatOption GetBitmapPostingFormatOption() const;

private:
inline void InitOptionFlag(optionflag_t flag) {
has_term_payload_ = flag & of_term_payload;
doc_list_format_option_.Init(flag);
pos_list_format_option_.Init(flag);
}

bool has_term_payload_;
DocListFormatOption doc_list_format_option_;
PositionListFormatOption pos_list_format_option_;
Expand Down
26 changes: 24 additions & 2 deletions src/storage/invertedindex/index_config.cppm
Original file line number Diff line number Diff line change
@@ -1,19 +1,41 @@
module;

import stl;
import index_defines;
import posting_list_format;
export module index_config;

namespace infinity {
export class InvertedIndexConfig {
public:
void SetOptionFlag(optionflag_t flag) { flag_ = flag; }
void SetOptionFlag(optionflag_t flag) {
flag_ = flag;
posting_format_option_.InitOptionFlag(flag_);
}

optionflag_t GetOptionFlag() const { return flag_; }
void SetShortListVbyteCompress(bool is_short_list_vbyte_compress) { is_short_list_vbyte_compress_ = is_short_list_vbyte_compress; }

void SetShortListVbyteCompress(bool is_short_list_vbyte_compress) {
is_short_list_vbyte_compress_ = is_short_list_vbyte_compress;
posting_format_option_.SetShortListVbyteCompress(is_short_list_vbyte_compress_);
}

bool IsShortListVbyteCompress() const { return is_short_list_vbyte_compress_; }

PostingFormatOption GetPostingFormatOption() const { return posting_format_option_; }

void SetIndexName(const String &index_name) { index_name_ = index_name; }
String GetIndexName() const { return index_name_; }

void SetAnalyzer(const String &analyzer) { analyzer_ = analyzer; }
String GetAnalyzer() const { return analyzer_; }

private:
String index_name_;
PostingFormatOption posting_format_option_;
optionflag_t flag_;
bool is_short_list_vbyte_compress_;
String analyzer_;
};

} // namespace infinity
2 changes: 2 additions & 0 deletions src/storage/invertedindex/index_defines.cppm
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,14 @@ export {
typedef u32 tf_t;
typedef i64 ttf_t;
typedef u32 field_len_t;
typedef i32 segmentid_t;

constexpr optionflag_t OPTION_FLAG_ALL = of_term_payload | of_doc_payload | of_position_list | of_term_frequency;
constexpr optionflag_t NO_TERM_FREQUENCY = of_doc_payload | of_term_payload;
constexpr optionflag_t OPTION_FLAG_NONE = of_none;
constexpr docid_t INVALID_DOCID = -1;
constexpr pos_t INVALID_POSITION = u32_max;
constexpr segmentid_t INVALID_SEGMENTID = -1;
constexpr u32 MAX_DOC_PER_RECORD = 128;
constexpr u32 MAX_DOC_PER_RECORD_BIT_NUM = 7;
constexpr u32 MAX_DOC_PER_BITMAP_BLOCK = 256;
Expand Down
46 changes: 46 additions & 0 deletions src/storage/invertedindex/index_reader.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
module;

#include <vector>

import stl;
import memory_pool;
import segment_posting;
import index_segment_reader;
import posting_iterator;
import index_defines;
import index_config;
import segment;
import disk_index_segment_reader;
import inmem_index_segment_reader;
import indexer;
module index_reader;

namespace infinity {
void IndexReader::Open(const InvertedIndexConfig &index_config) {
root_dir_ = index_config.GetIndexName();
Vector<Segment> segments;
GetSegments(index_config.GetIndexName(), segments);
Vector<SharedPtr<IndexSegmentReader>> segment_readers;
for (auto &segment : segments) {
if (segment.GetSegmentStatus() == Segment::BUILT) {
SharedPtr<DiskIndexSegmentReader> segment_reader = CreateDiskSegmentReader(segment);
segment_readers.push_back(segment_reader);
} else {
SharedPtr<IndexSegmentReader> segment_reader = CreateInMemSegmentReader(segment);
segment_readers.push_back(segment_reader);
}
}
}

void IndexReader::GetSegments(const String &directory, Vector<Segment> &segments) {}

SharedPtr<DiskIndexSegmentReader> IndexReader::CreateDiskSegmentReader(const Segment &segment) {
return MakeShared<DiskIndexSegmentReader>(root_dir_, segment);
}

SharedPtr<IndexSegmentReader> IndexReader::CreateInMemSegmentReader(Segment &segment) {
SharedPtr<Indexer> index_writer = segment.GetIndexWriter();
return index_writer->CreateInMemSegmentReader();
}

} // namespace infinity
38 changes: 38 additions & 0 deletions src/storage/invertedindex/index_reader.cppm
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
module;

import stl;
import memory_pool;
import segment_posting;
import index_segment_reader;
import posting_iterator;
import index_defines;
import index_config;
import segment;
import disk_index_segment_reader;
import inmem_index_segment_reader;
export module index_reader;

namespace infinity {
export class IndexReader {
public:
IndexReader() = default;
virtual ~IndexReader() = default;

void Open(const InvertedIndexConfig &index_config);

PostingIterator *Lookup(const String &term) { return nullptr; }

void GetSegments(const String &directory, Vector<Segment> &segments);

bool GetSegmentPosting(const String &term, docid_t base_doc_id, SegmentPosting &seg_posting, MemoryPool *session_pool) { return false; }

private:
SharedPtr<DiskIndexSegmentReader> CreateDiskSegmentReader(const Segment &segment);

SharedPtr<IndexSegmentReader> CreateInMemSegmentReader(Segment &segment);

private:
String root_dir_;
};

} // namespace infinity
12 changes: 12 additions & 0 deletions src/storage/invertedindex/index_segment_reader.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
module;

import stl;
import memory_pool;
import segment_posting;
import index_defines;
import segment;
module index_segment_reader;

namespace infinity {
IndexSegmentReader::IndexSegmentReader() {}
} // namespace infinity
19 changes: 19 additions & 0 deletions src/storage/invertedindex/index_segment_reader.cppm
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
module;

import stl;
import memory_pool;
import segment_posting;
import index_defines;
import segment;
export module index_segment_reader;

namespace infinity {
export class IndexSegmentReader {
public:
IndexSegmentReader();
virtual ~IndexSegmentReader() = default;

virtual bool GetSegmentPosting(const String &term, docid_t base_doc_id, SegmentPosting &seg_posting, MemoryPool *session_pool) const = 0;
};

} // namespace infinity
Loading

0 comments on commit 3c67f2f

Please sign in to comment.