Skip to content

Commit

Permalink
Write posting list size instead of posting list numbers (#257)
Browse files Browse the repository at this point in the history
  • Loading branch information
tyb0807 authored Jan 2, 2025
1 parent 9ff16eb commit 31c7367
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 13 deletions.
2 changes: 1 addition & 1 deletion rs/demo/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "demo"
version = "0.1.0"
edition = "2024"
edition = "2021"

[dependencies]
tonic.workspace = true
Expand Down
5 changes: 3 additions & 2 deletions rs/index/src/ivf/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -285,10 +285,10 @@ mod tests {
// Posting list offset starts at 0 (see FileBackedAppendablePostingListStorage)
let mut pl_offset = 0;
for posting_list in posting_lists.iter() {
let pl_len = posting_list.len();
let pl_len = posting_list.len() * size_of::<u64>();
assert!(file.write_all(&(pl_len as u64).to_le_bytes()).is_ok());
assert!(file.write_all(&(pl_offset as u64).to_le_bytes()).is_ok());
pl_offset += pl_len * size_of::<u64>();
pl_offset += pl_len;
offset += 2 * size_of::<u64>();
}
for posting_list in posting_lists.iter() {
Expand Down Expand Up @@ -341,6 +341,7 @@ mod tests {
assert_eq!(ivf.num_clusters, num_clusters);
let cluster_0 = ivf.index_storage.get_posting_list(0);
let cluster_1 = ivf.index_storage.get_posting_list(1);
println!("{:?} {:?}", cluster_0, cluster_1);
assert!(cluster_0.map_or(false, |list| list.contains(&0)));
assert!(cluster_1.map_or(false, |list| list.contains(&2)));
}
Expand Down
4 changes: 2 additions & 2 deletions rs/index/src/posting_list/combined_file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ impl FixedIndexFile {
..metadata_offset + PL_METADATA_LEN * size_of::<u64>()];
let pl_offset = u64::from_le_bytes(slice.try_into()?) as usize + posting_list_start_offset;

let slice = &self.mmap[pl_offset..pl_offset + pl_len * size_of::<u64>()];
let slice = &self.mmap[pl_offset..pl_offset + pl_len];
Ok(transmute_u8_to_slice::<u64>(slice))
}

Expand Down Expand Up @@ -218,7 +218,7 @@ mod tests {

let posting_lists: Vec<Vec<u64>> = vec![vec![1, 2, 3, 4], vec![5, 6, 7, 8, 9, 10]];
// Posting list offset starts at 0 (see FileBackedAppendablePostingListStorage)
let metadata: Vec<u64> = vec![4, 0, 6, 32];
let metadata: Vec<u64> = vec![4 * 8, 0, 6 * 8, 32];
assert!(file.write_all(&num_clusters).is_ok());
assert!(file.write_all(transmute_slice_to_u8(&metadata)).is_ok());
assert!(file
Expand Down
20 changes: 13 additions & 7 deletions rs/index/src/posting_list/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -250,8 +250,9 @@ impl FileBackedAppendablePostingListStorage {
self.metadata_backing_files.new_backing_file()?;
}
// Write the length of the posting list
self.metadata_backing_files
.write_to_current_mmap(&posting_list.len().to_le_bytes())?;
self.metadata_backing_files.write_to_current_mmap(
&((posting_list.len() * size_of::<u64>()) as u64).to_le_bytes(),
)?;
// Write the offset to the current posting list
self.metadata_backing_files.write_to_current_mmap(
&self
Expand Down Expand Up @@ -337,11 +338,9 @@ impl<'a> PostingListStorage<'a> for FileBackedAppendablePostingListStorage {
.next()
.ok_or(anyhow!("Expected a single slice but got none"))?;

let pl_len = u64::from_le_bytes(metadata_slice[..u64_bytes].try_into()?) as usize;
let required_size = u64::from_le_bytes(metadata_slice[..u64_bytes].try_into()?) as usize;
let pl_offset = u64::from_le_bytes(metadata_slice[u64_bytes..].try_into()?) as usize;

let required_size = pl_len * u64_bytes;

Ok(PostingList::new_with_slices(
self.posting_list_backing_files
.get_slices_at(pl_offset, required_size)?,
Expand Down Expand Up @@ -508,7 +507,7 @@ mod tests {
// Read length
let length_bytes: [u8; 8] = mmap[0..u64_bytes].try_into().unwrap();
let length = u64::from_le_bytes(length_bytes);
assert_eq!(length, pl1.len() as u64);
assert_eq!(length, (pl1.len() * u64_bytes) as u64);

// Read offset
let offset_bytes: [u8; 8] = mmap[u64_bytes..metadata_size].try_into().unwrap();
Expand Down Expand Up @@ -751,7 +750,14 @@ mod tests {
let length = u64::from_le_bytes(length_bytes);
let offset = u64::from_le_bytes(offset_bytes);

assert_eq!(length as usize, if i == 0 { pl1.len() } else { pl2.len() });
assert_eq!(
length as usize,
if i == 0 {
pl1.len() * size_of::<u64>()
} else {
pl2.len() * size_of::<u64>()
}
);
assert_eq!(
offset as usize,
if i == 0 {
Expand Down
2 changes: 1 addition & 1 deletion rs/index/src/posting_list/fixed_file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ impl FixedFilePostingListStorage {
[metadata_offset + size_in_bytes..metadata_offset + PL_METADATA_LEN * size_in_bytes];
let pl_offset = u64::from_le_bytes(slice.try_into()?) as usize + pl_start_offset;

let slice = &self.mmap[pl_offset..pl_offset + pl_len * std::mem::size_of::<u64>()];
let slice = &self.mmap[pl_offset..pl_offset + pl_len];
Ok(transmute_u8_to_slice::<u64>(slice))
}
}
Expand Down

0 comments on commit 31c7367

Please sign in to comment.