From 06867ac3cfabbabd393b4c9d3c1efa04d92b94da Mon Sep 17 00:00:00 2001 From: Luiz Irber Date: Mon, 11 Nov 2024 17:25:00 -0800 Subject: [PATCH] remove match_ from mem revindex gather, still working on FFI --- src/core/src/ffi/index/revindex.rs | 5 +--- src/core/src/index/revindex/mem_revindex.rs | 31 +++++++++++---------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/core/src/ffi/index/revindex.rs b/src/core/src/ffi/index/revindex.rs index ef0e328139..db16e1d40b 100644 --- a/src/core/src/ffi/index/revindex.rs +++ b/src/core/src/ffi/index/revindex.rs @@ -207,16 +207,13 @@ unsafe fn revindex_gather( let threshold: usize = (threshold * (mh.size() as f64)) as _; let counter = revindex.counter_for_query(mh); - dbg!(&counter); let results: Vec<(f64, Signature, String)> = revindex .gather(counter, threshold, mh) .unwrap() // TODO: proper error handling .into_iter() .map(|r| { - let filename = r.filename().to_owned(); - let sig = r.get_match(); - (r.f_match(), sig, filename) + todo!() }) .collect(); diff --git a/src/core/src/index/revindex/mem_revindex.rs b/src/core/src/index/revindex/mem_revindex.rs index 08b7bc56ac..1bd1ba9db7 100644 --- a/src/core/src/index/revindex/mem_revindex.rs +++ b/src/core/src/index/revindex/mem_revindex.rs @@ -208,26 +208,29 @@ impl RevIndex { let mut matches = vec![]; while match_size > threshold && !counter.is_empty() { - let (dataset_id, size) = counter.most_common()[0]; + let (dataset_id, size) = counter.k_most_common_ordered(1)[0]; match_size = if size >= threshold { size } else { break }; let result = self .linear .gather_round(dataset_id, match_size, query, matches.len())?; - if let Some(Sketch::MinHash(match_mh)) = - result.match_.select_sketch(self.linear.template()) - { - // Prepare counter for finding the next match by decrementing - // all hashes found in the current match in other datasets - for hash in match_mh.iter_mins() { - if let Some(color) = self.hash_to_color.get(hash) { - counter.subtract(self.colors.indices(color).cloned()); - } + + // handle special case where threshold was set to 0 + if match_size == 0 { + break; + } + + let match_sig = self.linear.collection().sig_for_dataset(dataset_id)?; + let match_mh = match_sig.minhash().unwrap().clone(); + + // Prepare counter for finding the next match by decrementing + // all hashes found in the current match in other datasets + for hash in match_mh.iter_mins() { + if let Some(color) = self.hash_to_color.get(hash) { + counter.subtract(self.colors.indices(color).cloned()); } - counter.remove(&dataset_id); - matches.push(result); - } else { - unimplemented!() } + counter.remove(&dataset_id); + matches.push(result); } Ok(matches) }