Skip to content

Commit

Permalink
Add a NoQuantizer for when we dont want to quantize (#153)
Browse files Browse the repository at this point in the history
  • Loading branch information
hicder authored Dec 2, 2024
1 parent d0a7c65 commit 050cdfb
Show file tree
Hide file tree
Showing 6 changed files with 181 additions and 2 deletions.
2 changes: 1 addition & 1 deletion rs/index/src/hnsw/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -590,7 +590,7 @@ mod tests {
vectors.append(&vec![2, 2, 2, 2, 2]).unwrap();

let mut builder = HnswBuilder {
vectors: vectors,
vectors,
max_neighbors: 1,
layers: vec![layer],
current_top_layer: 0,
Expand Down
1 change: 0 additions & 1 deletion rs/index/src/hnsw/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ impl<T: VectorT<Q>, Q: Quantizer> Hnsw<T, Q> {
// let pq = pq_reader.read().unwrap();

let quantizer = Q::read(quantizer_directory).unwrap();

let index_mmap = unsafe { Mmap::map(&backing_file).unwrap() };

Self {
Expand Down
42 changes: 42 additions & 0 deletions rs/index/src/hnsw/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ impl HnswReader {
mod tests {
use std::fs;

use quantization::no_op::{NoQuantizer, NoQuantizerWriter};
use quantization::pq::{ProductQuantizer, ProductQuantizerConfig, ProductQuantizerWriter};
use quantization::pq_builder::{ProductQuantizerBuilder, ProductQuantizerBuilderConfig};
use utils::test_utils::generate_random_vector;
Expand Down Expand Up @@ -165,4 +166,45 @@ mod tests {
assert_eq!(49, hnsw.get_data_offset());
assert_eq!(16, hnsw.get_header().quantized_dimension);
}

#[test]
fn test_read_no_op_quantizer() {
let temp_dir = tempdir::TempDir::new("product_quantizer_test").unwrap();
let base_directory = temp_dir.path().to_str().unwrap().to_string();
let vector_dir = format!("{}/vectors", base_directory);
fs::create_dir_all(vector_dir.clone()).unwrap();
let datapoints: Vec<Vec<f32>> = (0..10000).map(|_| generate_random_vector(128)).collect();

// quantizer
let quantizer = NoQuantizer::new(128);
let quantizer_dir = format!("{}/quantizer", base_directory);
fs::create_dir_all(quantizer_dir.clone()).unwrap();
let quantizer_writer = NoQuantizerWriter::new(quantizer_dir);
quantizer_writer.write(&quantizer).unwrap();

let mut hnsw_builder = HnswBuilder::<f32, NoQuantizer>::new(
10, 128, 20, 1024, 4096, 128, quantizer, vector_dir,
);
for i in 0..datapoints.len() {
hnsw_builder.insert(i as u64, &datapoints[i]).unwrap();
}

let hnsw_dir = format!("{}/hnsw", base_directory);
fs::create_dir_all(hnsw_dir.clone()).unwrap();
let writer = HnswWriter::new(hnsw_dir);
match writer.write(&mut hnsw_builder, false) {
Ok(()) => {
assert!(true);
}
Err(_) => {
assert!(false);
}
}

// Read from file
let reader = HnswReader::new(base_directory.clone());
let hnsw = reader.read::<f32, NoQuantizer>();
assert_eq!(49, hnsw.get_data_offset());
assert_eq!(128, hnsw.get_header().quantized_dimension);
}
}
2 changes: 2 additions & 0 deletions rs/quantization/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@ pub mod rabitq;
pub mod rabitq_builder;
pub mod quantization;
pub mod typing;
pub mod no_op;
pub mod reader;
94 changes: 94 additions & 0 deletions rs/quantization/src/no_op.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
use anyhow::Result;
use serde::{Deserialize, Serialize};

use crate::quantization::Quantizer;

pub struct NoQuantizer {
dimension: usize,
}

impl NoQuantizer {
pub fn new(dimension: usize) -> Self {
Self { dimension }
}
}

impl Quantizer for NoQuantizer {
fn quantize(&self, _value: &[f32]) -> Vec<u8> {
// Throw an error if called
todo!()
}

fn quantized_dimension(&self) -> usize {
self.dimension
}

fn original_vector(&self, _quantized_vector: &[u8]) -> Vec<f32> {
// Throw an error if called
todo!()
}

fn distance(
&self,
_query: &[u8],
_point: &[u8],
_implem: utils::distance::l2::L2DistanceCalculatorImpl,
) -> f32 {
// Throw an error if called
todo!()
}

fn read(dir: String) -> Result<Self>
where
Self: Sized,
{
let reader = NoQuantizerReader::new(dir);
reader.read()
}
}

#[derive(Serialize, Deserialize)]
pub struct NoQuantizerConfig {
dimension: usize,
}

pub struct NoQuantizerReader {
base_directory: String,
}

impl NoQuantizerReader {
pub fn new(base_directory: String) -> Self {
Self { base_directory }
}

pub fn read(&self) -> Result<NoQuantizer> {
// Deserialieze the config
let config = serde_yaml::from_str::<NoQuantizerConfig>(&std::fs::read_to_string(
&format!("{}/no_op_quantizer_config.yaml", self.base_directory),
)?)?;
Ok(NoQuantizer::new(config.dimension))
}
}

// Writer

pub struct NoQuantizerWriter {
base_directory: String,
}

impl NoQuantizerWriter {
pub fn new(base_directory: String) -> Self {
Self { base_directory }
}

pub fn write(&self, quantizer: &NoQuantizer) -> Result<()> {
let config = NoQuantizerConfig {
dimension: quantizer.dimension,
};
std::fs::write(
&format!("{}/no_op_quantizer_config.yaml", self.base_directory),
serde_yaml::to_string(&config)?,
)?;
Ok(())
}
}
42 changes: 42 additions & 0 deletions rs/quantization/src/reader.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
use std::path::Path;

use anyhow::Result;

use crate::no_op::{NoQuantizer, NoQuantizerReader};
use crate::pq::{ProductQuantizer, ProductQuantizerReader};

pub struct QuantizationReader {
base_directory: String,
}

pub enum QuantizationType {
ProductQuantizer,
NoQuantization,
}

impl QuantizationReader {
pub fn new(base_directory: String) -> Self {
Self { base_directory }
}

pub fn get_quantization_type(&self) -> QuantizationType {
// If exists the file, then it is a product quantizer
if Path::new(&self.base_directory)
.join("product_quantizer_config.yaml")
.exists()
{
return QuantizationType::ProductQuantizer;
}
QuantizationType::NoQuantization
}

pub fn read_product_quantizer(&self) -> Result<ProductQuantizer> {
let reader = ProductQuantizerReader::new(self.base_directory.clone());
reader.read()
}

pub fn read_no_quantization(&self) -> Result<NoQuantizer> {
let reader = NoQuantizerReader::new(self.base_directory.clone());
reader.read()
}
}

0 comments on commit 050cdfb

Please sign in to comment.