From 169bc6dd7b26fea0cb077c3be7c251126a86df63 Mon Sep 17 00:00:00 2001 From: tyb0807 Date: Fri, 3 Jan 2025 16:14:25 +0100 Subject: [PATCH] Encode value 1 by 1 to avoid allocating mem for posting lists (#267) --- rs/index/src/ivf/writer.rs | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/rs/index/src/ivf/writer.rs b/rs/index/src/ivf/writer.rs index df808db..9885462 100644 --- a/rs/index/src/ivf/writer.rs +++ b/rs/index/src/ivf/writer.rs @@ -183,19 +183,15 @@ impl IvfWriter { metadata_bytes_written += wrap_write(&mut metadata_writer, &num_posting_lists.to_le_bytes())?; for i in 0..num_posting_lists { - // TODO(tyb): we need to materialize the posting list here since we are - // not sure the whole list is on the same page. Optimize this in a separate PR - let posting_list = ivf_builder - .posting_lists() - .get(i as u32)? - .iter() - .collect::>(); + let posting_list = ivf_builder.posting_lists().get(i as u32)?; let mut encoder = C::new_encoder( - *posting_list.last().unwrap_or(&0) as usize, - posting_list.len(), + posting_list.last().unwrap_or(0) as usize, + posting_list.elem_count, ); // Encode to get the length of the encoded data - encoder.encode_batch(&posting_list)?; + for val in posting_list.iter() { + encoder.encode_value(&val)?; + } // Write the length of the encoded posting list metadata_bytes_written += wrap_write(&mut metadata_writer, &encoder.len().to_le_bytes())?;