Skip to content

Commit

Permalink
Use 2 separate mmaps for metadata and posting lists
Browse files Browse the repository at this point in the history
  • Loading branch information
BuildKite committed Dec 5, 2024
1 parent d548578 commit ca76a21
Show file tree
Hide file tree
Showing 5 changed files with 236 additions and 225 deletions.
2 changes: 0 additions & 2 deletions rs/index/src/ivf/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,6 @@ impl IvfBuilder {
posting_lists_path,
config.memory_size,
config.file_size,
config.num_clusters,
));

Ok(Self {
Expand Down Expand Up @@ -218,7 +217,6 @@ impl IvfBuilder {
posting_list_storage_location,
self.config.memory_size,
self.config.file_size,
self.centroids.len(),
));

// Move ownership of each posting list to the posting list storage
Expand Down
3 changes: 2 additions & 1 deletion rs/index/src/ivf/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,8 @@ mod tests {
// Write posting lists
assert!(file.write_all(&(num_clusters as u64).to_le_bytes()).is_ok());
offset += size_of::<u64>();
let mut pl_offset = num_clusters * 2 * size_of::<u64>();
// Posting list offset starts at 0 (see FileBackedAppendablePostingListStorage)
let mut pl_offset = 0;
for posting_list in posting_lists.iter() {
let pl_len = posting_list.len();
assert!(file.write_all(&(pl_len as u64).to_le_bytes()).is_ok());
Expand Down
10 changes: 7 additions & 3 deletions rs/index/src/posting_list/combined_file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,13 +135,16 @@ impl FixedIndexFile {

let metadata_offset =
self.posting_list_metadata_offset + index * PL_METADATA_LEN * size_of::<u64>();

let posting_list_start_offset = self.posting_list_metadata_offset
+ self.header.num_clusters as usize * PL_METADATA_LEN * size_of::<u64>();

let slice = &self.mmap[metadata_offset..metadata_offset + size_of::<u64>()];
let pl_len = u64::from_le_bytes(slice.try_into()?) as usize;

let slice = &self.mmap[metadata_offset + size_of::<u64>()
..metadata_offset + PL_METADATA_LEN * size_of::<u64>()];
let pl_offset =
u64::from_le_bytes(slice.try_into()?) as usize + self.posting_list_metadata_offset;
let pl_offset = u64::from_le_bytes(slice.try_into()?) as usize + posting_list_start_offset;

let slice = &self.mmap[pl_offset..pl_offset + pl_len * size_of::<u64>()];
Ok(transmute_u8_to_slice::<u64>(slice))
Expand Down Expand Up @@ -208,7 +211,8 @@ mod tests {
// No need for padding here

let posting_lists: Vec<Vec<u64>> = vec![vec![1, 2, 3, 4], vec![5, 6, 7, 8, 9, 10]];
let metadata: Vec<u64> = vec![4, 32, 6, 64];
// Posting list offset starts at 0 (see FileBackedAppendablePostingListStorage)
let metadata: Vec<u64> = vec![4, 0, 6, 32];
assert!(file.write_all(&num_clusters).is_ok());
assert!(file.write_all(transmute_slice_to_u8(&metadata)).is_ok());
assert!(file
Expand Down
Loading

0 comments on commit ca76a21

Please sign in to comment.