diff --git a/rs/index/src/hnsw/builder.rs b/rs/index/src/hnsw/builder.rs index 8d7786b6..207d6332 100644 --- a/rs/index/src/hnsw/builder.rs +++ b/rs/index/src/hnsw/builder.rs @@ -590,7 +590,7 @@ mod tests { vectors.append(&vec![2, 2, 2, 2, 2]).unwrap(); let mut builder = HnswBuilder { - vectors: vectors, + vectors, max_neighbors: 1, layers: vec![layer], current_top_layer: 0, diff --git a/rs/index/src/hnsw/index.rs b/rs/index/src/hnsw/index.rs index 40c4775d..5d0b5ae8 100644 --- a/rs/index/src/hnsw/index.rs +++ b/rs/index/src/hnsw/index.rs @@ -74,7 +74,6 @@ impl, Q: Quantizer> Hnsw { // let pq = pq_reader.read().unwrap(); let quantizer = Q::read(quantizer_directory).unwrap(); - let index_mmap = unsafe { Mmap::map(&backing_file).unwrap() }; Self { diff --git a/rs/index/src/hnsw/reader.rs b/rs/index/src/hnsw/reader.rs index b051dc0f..df67b602 100644 --- a/rs/index/src/hnsw/reader.rs +++ b/rs/index/src/hnsw/reader.rs @@ -99,6 +99,7 @@ impl HnswReader { mod tests { use std::fs; + use quantization::no_op::{NoQuantizer, NoQuantizerWriter}; use quantization::pq::{ProductQuantizer, ProductQuantizerConfig, ProductQuantizerWriter}; use quantization::pq_builder::{ProductQuantizerBuilder, ProductQuantizerBuilderConfig}; use utils::test_utils::generate_random_vector; @@ -165,4 +166,45 @@ mod tests { assert_eq!(49, hnsw.get_data_offset()); assert_eq!(16, hnsw.get_header().quantized_dimension); } + + #[test] + fn test_read_no_op_quantizer() { + let temp_dir = tempdir::TempDir::new("product_quantizer_test").unwrap(); + let base_directory = temp_dir.path().to_str().unwrap().to_string(); + let vector_dir = format!("{}/vectors", base_directory); + fs::create_dir_all(vector_dir.clone()).unwrap(); + let datapoints: Vec> = (0..10000).map(|_| generate_random_vector(128)).collect(); + + // quantizer + let quantizer = NoQuantizer::new(128); + let quantizer_dir = format!("{}/quantizer", base_directory); + fs::create_dir_all(quantizer_dir.clone()).unwrap(); + let quantizer_writer = NoQuantizerWriter::new(quantizer_dir); + quantizer_writer.write(&quantizer).unwrap(); + + let mut hnsw_builder = HnswBuilder::::new( + 10, 128, 20, 1024, 4096, 128, quantizer, vector_dir, + ); + for i in 0..datapoints.len() { + hnsw_builder.insert(i as u64, &datapoints[i]).unwrap(); + } + + let hnsw_dir = format!("{}/hnsw", base_directory); + fs::create_dir_all(hnsw_dir.clone()).unwrap(); + let writer = HnswWriter::new(hnsw_dir); + match writer.write(&mut hnsw_builder, false) { + Ok(()) => { + assert!(true); + } + Err(_) => { + assert!(false); + } + } + + // Read from file + let reader = HnswReader::new(base_directory.clone()); + let hnsw = reader.read::(); + assert_eq!(49, hnsw.get_data_offset()); + assert_eq!(128, hnsw.get_header().quantized_dimension); + } } diff --git a/rs/quantization/src/lib.rs b/rs/quantization/src/lib.rs index 3eb78b47..b637fb58 100644 --- a/rs/quantization/src/lib.rs +++ b/rs/quantization/src/lib.rs @@ -5,3 +5,5 @@ pub mod rabitq; pub mod rabitq_builder; pub mod quantization; pub mod typing; +pub mod no_op; +pub mod reader; diff --git a/rs/quantization/src/no_op.rs b/rs/quantization/src/no_op.rs new file mode 100644 index 00000000..436f39c7 --- /dev/null +++ b/rs/quantization/src/no_op.rs @@ -0,0 +1,94 @@ +use anyhow::Result; +use serde::{Deserialize, Serialize}; + +use crate::quantization::Quantizer; + +pub struct NoQuantizer { + dimension: usize, +} + +impl NoQuantizer { + pub fn new(dimension: usize) -> Self { + Self { dimension } + } +} + +impl Quantizer for NoQuantizer { + fn quantize(&self, _value: &[f32]) -> Vec { + // Throw an error if called + todo!() + } + + fn quantized_dimension(&self) -> usize { + self.dimension + } + + fn original_vector(&self, _quantized_vector: &[u8]) -> Vec { + // Throw an error if called + todo!() + } + + fn distance( + &self, + _query: &[u8], + _point: &[u8], + _implem: utils::distance::l2::L2DistanceCalculatorImpl, + ) -> f32 { + // Throw an error if called + todo!() + } + + fn read(dir: String) -> Result + where + Self: Sized, + { + let reader = NoQuantizerReader::new(dir); + reader.read() + } +} + +#[derive(Serialize, Deserialize)] +pub struct NoQuantizerConfig { + dimension: usize, +} + +pub struct NoQuantizerReader { + base_directory: String, +} + +impl NoQuantizerReader { + pub fn new(base_directory: String) -> Self { + Self { base_directory } + } + + pub fn read(&self) -> Result { + // Deserialieze the config + let config = serde_yaml::from_str::(&std::fs::read_to_string( + &format!("{}/no_op_quantizer_config.yaml", self.base_directory), + )?)?; + Ok(NoQuantizer::new(config.dimension)) + } +} + +// Writer + +pub struct NoQuantizerWriter { + base_directory: String, +} + +impl NoQuantizerWriter { + pub fn new(base_directory: String) -> Self { + Self { base_directory } + } + + pub fn write(&self, quantizer: &NoQuantizer) -> Result<()> { + let config = NoQuantizerConfig { + dimension: quantizer.dimension, + }; + std::fs::write( + &format!("{}/no_op_quantizer_config.yaml", self.base_directory), + serde_yaml::to_string(&config)?, + )?; + Ok(()) + } +} diff --git a/rs/quantization/src/reader.rs b/rs/quantization/src/reader.rs new file mode 100644 index 00000000..91d79943 --- /dev/null +++ b/rs/quantization/src/reader.rs @@ -0,0 +1,42 @@ +use std::path::Path; + +use anyhow::Result; + +use crate::no_op::{NoQuantizer, NoQuantizerReader}; +use crate::pq::{ProductQuantizer, ProductQuantizerReader}; + +pub struct QuantizationReader { + base_directory: String, +} + +pub enum QuantizationType { + ProductQuantizer, + NoQuantization, +} + +impl QuantizationReader { + pub fn new(base_directory: String) -> Self { + Self { base_directory } + } + + pub fn get_quantization_type(&self) -> QuantizationType { + // If exists the file, then it is a product quantizer + if Path::new(&self.base_directory) + .join("product_quantizer_config.yaml") + .exists() + { + return QuantizationType::ProductQuantizer; + } + QuantizationType::NoQuantization + } + + pub fn read_product_quantizer(&self) -> Result { + let reader = ProductQuantizerReader::new(self.base_directory.clone()); + reader.read() + } + + pub fn read_no_quantization(&self) -> Result { + let reader = NoQuantizerReader::new(self.base_directory.clone()); + reader.read() + } +}