Skip to content

Commit

Permalink
Add test for EF encoder in IvfWriter (#266)
Browse files Browse the repository at this point in the history
  • Loading branch information
tyb0807 authored Jan 3, 2025
1 parent 0e33ac3 commit 49d0361
Showing 1 changed file with 85 additions and 0 deletions.
85 changes: 85 additions & 0 deletions rs/index/src/ivf/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,7 @@ mod tests {
use std::path::Path;

use byteorder::{LittleEndian, ReadBytesExt};
use compression::elias_fano::ef::EliasFano;
use compression::noc::noc::PlainEncoder;
use quantization::noq::noq::NoQuantizer;
use quantization::pq::pq::ProductQuantizer;
Expand Down Expand Up @@ -536,6 +537,90 @@ mod tests {
}
}

#[test]
fn test_write_posting_lists_and_metadata() {
let temp_dir = TempDir::new("test_write_posting_lists_and_metadata").unwrap();
let base_directory = temp_dir
.path()
.to_str()
.expect("Failed to convert temporary directory path to string")
.to_string();
let num_clusters = 1;
let num_vectors = 2;
let num_features = 3;
let file_size = 4096;

let quantizer = NoQuantizer::new(num_features);
let ivf_writer = IvfWriter::<_, EliasFano>::new(base_directory.clone(), quantizer);

let mut ivf_builder = IvfBuilder::new(IvfBuilderConfig {
max_iteration: 1000,
batch_size: 4,
num_clusters,
num_data_points_for_clustering: num_vectors,
max_clusters_per_vector: 1,
distance_threshold: 0.1,
base_directory: base_directory.clone(),
memory_size: 1024,
file_size,
num_features,
tolerance: 0.0,
max_posting_list_size: usize::MAX,
})
.expect("Failed to create builder");

ivf_builder
.add_posting_list(&vec![5, 8, 8, 15, 32])
.expect("Posting list should be added");

let bytes_written = ivf_writer
.write_posting_lists_and_metadata(&mut ivf_builder)
.expect("Failed to write posting lists and metadata");

// Verify the metadata file
let metadata_path = format!("{}/posting_list_metadata", base_directory);
let mut metadata_file = File::open(metadata_path).expect("Failed to open metadata file");
let mut metadata_content = Vec::new();
metadata_file
.read_to_end(&mut metadata_content)
.expect("Failed to read metadata file");

// Verify the posting lists file
let posting_lists_path = format!("{}/posting_lists", base_directory);
let mut posting_lists_file =
File::open(posting_lists_path).expect("Failed to open posting lists file");
let mut posting_lists_content = Vec::new();
posting_lists_file
.read_to_end(&mut posting_lists_content)
.expect("Failed to read posting lists file");

// Check the total bytes written
assert_eq!(
bytes_written,
metadata_content.len() + posting_lists_content.len()
);

// Check metadata file
let expected_metadata = vec![
1, 0, 0, 0, 0, 0, 0, 0, // num_posting_lists
5, 0, 0, 0, 0, 0, 0, 0, // posting_list0_len
0, 0, 0, 0, 0, 0, 0, 0, // posting_list0_offset
];
assert_eq!(metadata_content, expected_metadata);
assert_eq!(metadata_content.len(), 8 * 3);

// Check posting list file
let expected_posting_lists = vec![
2, 0, 0, 0, 0, 0, 0, 0, // lower_bit_length
1, 0, 0, 0, 0, 0, 0, 0, // number of u64 for encoding lower_bits
1, 0, 0, 0, 0, 0, 0, 0, // number of u64 for encoding upper_bits
0b11000001, 0, 0, 0, 0, 0, 0, 0, // lower_bits + padding
0b01011010, 0b00010000, 0, 0, 0, 0, 0, 0, // upper_bits + padding
];
assert_eq!(posting_lists_content, expected_posting_lists);
assert_eq!(posting_lists_content.len(), 8 * 5);
}

#[test]
fn test_ivf_writer_write() {
let temp_dir =
Expand Down

0 comments on commit 49d0361

Please sign in to comment.