Skip to content

Commit

Permalink
initial update impl
Browse files Browse the repository at this point in the history
  • Loading branch information
luizirber committed Sep 25, 2022
1 parent 6730509 commit e2c9e92
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 0 deletions.
13 changes: 13 additions & 0 deletions src/core/src/index/revindex/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,19 @@ impl RevIndex {
}
}

pub fn update(
&self,
index_sigs: Vec<PathBuf>,
template: &Sketch,
threshold: f64,
save_paths: bool,
) {
match self {
//Self::Color(db) => db.update(index_sigs, template, threshold, save_paths),
Self::Plain(db) => db.update(index_sigs, template, threshold, save_paths),
}
}

pub fn compact(&self) {
match self {
//Self::Color(db) => db.compact(),
Expand Down
61 changes: 61 additions & 0 deletions src/core/src/index/revindex/revindex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,67 @@ impl RevIndex {
info!("Processed {} reference sigs", processed_sigs.into_inner());
}

pub fn update(
&self,
index_sigs: Vec<PathBuf>,
template: &Sketch,
threshold: f64,
save_paths: bool,
) {
use byteorder::ReadBytesExt;

if !save_paths {
todo!("only supports with save_paths=True for now");
}

let cf_sigs = self.db.cf_handle(SIGS).unwrap();
let iter = self.db.iterator_cf(&cf_sigs, rocksdb::IteratorMode::Start);

// verify data match up to this point
let to_skip = iter
.zip(index_sigs.iter().enumerate())
.map(|((key, value), (dataset_id, filename))| {
let current_dataset_id = (&key[..]).read_u64::<LittleEndian>().unwrap();
assert_eq!(current_dataset_id, dataset_id as u64);

let sig_data = SignatureData::from_slice(&value).unwrap();
assert_eq!(
sig_data,
SignatureData::External(filename.as_os_str().to_str().unwrap().to_string())
);
})
.count();

// process the remainder
let processed_sigs = AtomicUsize::new(0);

index_sigs
.par_iter()
.skip(to_skip)
.enumerate()
.for_each(|(i, filename)| {
let dataset_id = i + to_skip;

let i = processed_sigs.fetch_add(1, Ordering::SeqCst);
if i % 1000 == 0 {
info!("Processed {} reference sigs", i);
}

self.map_hashes_colors(
dataset_id as DatasetID,
filename,
threshold,
template,
save_paths,
);
});

info!(
"Processed additional {} reference sigs",
processed_sigs.into_inner()
);
}

pub fn check(&self, quick: bool) {
stats_for_cf(self.db.clone(), HASHES, true, quick);
info!("");
Expand Down

0 comments on commit e2c9e92

Please sign in to comment.