Skip to content

Commit

Permalink
Encode value 1 by 1 to avoid allocating mem for posting lists (#267)
Browse files Browse the repository at this point in the history
  • Loading branch information
tyb0807 authored Jan 3, 2025
1 parent f43f8bd commit 169bc6d
Showing 1 changed file with 6 additions and 10 deletions.
16 changes: 6 additions & 10 deletions rs/index/src/ivf/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -183,19 +183,15 @@ impl<Q: Quantizer, C: IntSeqEncoder + 'static> IvfWriter<Q, C> {
metadata_bytes_written +=
wrap_write(&mut metadata_writer, &num_posting_lists.to_le_bytes())?;
for i in 0..num_posting_lists {
// TODO(tyb): we need to materialize the posting list here since we are
// not sure the whole list is on the same page. Optimize this in a separate PR
let posting_list = ivf_builder
.posting_lists()
.get(i as u32)?
.iter()
.collect::<Vec<_>>();
let posting_list = ivf_builder.posting_lists().get(i as u32)?;
let mut encoder = C::new_encoder(
*posting_list.last().unwrap_or(&0) as usize,
posting_list.len(),
posting_list.last().unwrap_or(0) as usize,
posting_list.elem_count,
);
// Encode to get the length of the encoded data
encoder.encode_batch(&posting_list)?;
for val in posting_list.iter() {
encoder.encode_value(&val)?;
}
// Write the length of the encoded posting list
metadata_bytes_written +=
wrap_write(&mut metadata_writer, &encoder.len().to_le_bytes())?;
Expand Down

0 comments on commit 169bc6d

Please sign in to comment.