Skip to content

Commit

Permalink
IntSeqEncoder trait and PlainEncoder implementation (#239)
Browse files Browse the repository at this point in the history
This trait is for implementing different encoding algorithms to compress posting lists
(or any sorted integer sequences)
  • Loading branch information
tyb0807 authored Dec 30, 2024
1 parent 8bcff77 commit f9a4678
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 0 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions rs/compression/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ bitvec = "1"
env_logger.workspace = true
log.workspace = true
tempdir.workspace = true
utils.workspace = true
20 changes: 20 additions & 0 deletions rs/compression/src/compression.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
use std::fs::File;
use std::io::BufWriter;

use anyhow::Result;

pub trait IntSeqEncoder {
/// Creates an encoder
fn new_encoder(universe: Option<usize>, size: usize) -> Self
where
Self: Sized;

/// Compresses a sorted slice of integers
fn encode(&mut self, values: &[u64]) -> Result<()>;

/// Returns the number of elements in the sequence
fn len(&self) -> usize;

/// Writes to disk and return number of bytes written.
fn write(&self, writer: &mut BufWriter<&mut File>) -> Result<usize>;
}
2 changes: 2 additions & 0 deletions rs/compression/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
pub mod compression;
pub mod elias_fano;
pub mod noc;
1 change: 1 addition & 0 deletions rs/compression/src/noc/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pub mod noc;
48 changes: 48 additions & 0 deletions rs/compression/src/noc/noc.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
use std::fs::File;
use std::io::{BufWriter, Write};

use anyhow::Result;
use utils::io::wrap_write;

use crate::compression::IntSeqEncoder;

pub struct PlainEncoder {
size: usize,
sequence: Vec<u64>,
}

impl PlainEncoder {
pub fn new(size: usize) -> Self {
Self {
size,
sequence: Vec::new(),
}
}
}

impl IntSeqEncoder for PlainEncoder {
fn new_encoder(_universe: Option<usize>, size: usize) -> Self {
Self::new(size)
}

fn encode(&mut self, values: &[u64]) -> Result<()> {
self.sequence = values.to_vec();
Ok(())
}

fn len(&self) -> usize {
self.size
}

fn write(&self, writer: &mut BufWriter<&mut File>) -> Result<usize> {
let mut total_bytes_written = 0;

for &val in self.sequence.iter() {
total_bytes_written += wrap_write(writer, &val.to_le_bytes())?;
}

writer.flush()?;

Ok(total_bytes_written)
}
}

0 comments on commit f9a4678

Please sign in to comment.