Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove match_ #3351

Draft
wants to merge 2 commits into
base: latest
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions src/core/src/ffi/index/revindex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -207,16 +207,13 @@
let threshold: usize = (threshold * (mh.size() as f64)) as _;

let counter = revindex.counter_for_query(mh);
dbg!(&counter);

let results: Vec<(f64, Signature, String)> = revindex
.gather(counter, threshold, mh)
.unwrap() // TODO: proper error handling
.into_iter()
.map(|r| {

Check warning on line 215 in src/core/src/ffi/index/revindex.rs

View workflow job for this annotation

GitHub Actions / minimum_rust_version

unused variable: `r`
let filename = r.filename().to_owned();
let sig = r.get_match();
(r.f_match(), sig, filename)
todo!()

Check warning on line 216 in src/core/src/ffi/index/revindex.rs

View check run for this annotation

Codecov / codecov/patch

src/core/src/ffi/index/revindex.rs#L216

Added line #L216 was not covered by tests
})
.collect();

Expand Down
2 changes: 0 additions & 2 deletions src/core/src/index/linear.rs
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,6 @@ impl LinearIndex {
let intersect_bp: u64 = match_mh.scaled() as u64 * intersect_orig;

let f_unique_to_query = intersect_orig as f64 / query.size() as f64;
let match_ = match_sig;

// TODO: all of these
let f_unique_weighted = 0.;
Expand Down Expand Up @@ -217,7 +216,6 @@ impl LinearIndex {
filename,
name,
md5,
match_,
f_match_orig,
unique_intersect_bp,
gather_result_rank,
Expand Down
21 changes: 7 additions & 14 deletions src/core/src/index/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,6 @@ pub struct GatherResult {
#[getset(get = "pub")]
md5: String,

#[serde(skip)]
match_: SigStore,

#[getset(get_copy = "pub")]
f_match_orig: f64,

Expand Down Expand Up @@ -118,12 +115,6 @@ pub struct GatherResult {
max_containment_ani: f64,
}

impl GatherResult {
pub fn get_match(&self) -> Signature {
self.match_.clone().into()
}
}

type SigCounter = counter::Counter<Idx>;

pub trait Index<'a> {
Expand Down Expand Up @@ -219,8 +210,11 @@ pub fn calculate_gather_stats(
calc_ani_ci: bool,
confidence: Option<f64>,
) -> Result<(GatherResult, (Vec<u64>, u64))> {
let match_filename = match_sig.filename();
let match_name = match_sig.name();
let match_md5 = match_sig.md5sum();
// get match_mh
let match_mh = match_sig.minhash().expect("cannot retrieve sketch");
let match_mh: KmerMinHash = match_sig.try_into()?;

// it's ok to downsample match, but query is often big and repeated,
// so we do not allow downsampling of query in this function.
Expand Down Expand Up @@ -330,10 +324,9 @@ pub fn calculate_gather_stats(
.average_abund(average_abund)
.median_abund(median_abund)
.std_abund(std_abund)
.filename(match_sig.filename())
.name(match_sig.name())
.md5(match_sig.md5sum())
.match_(match_sig)
.filename(match_filename)
.name(match_name)
.md5(match_md5)
.f_match_orig(f_match_orig)
.unique_intersect_bp(unique_intersect_bp)
.gather_result_rank(gather_result_rank)
Expand Down
31 changes: 17 additions & 14 deletions src/core/src/index/revindex/mem_revindex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -208,26 +208,29 @@
let mut matches = vec![];

while match_size > threshold && !counter.is_empty() {
let (dataset_id, size) = counter.most_common()[0];
let (dataset_id, size) = counter.k_most_common_ordered(1)[0];
match_size = if size >= threshold { size } else { break };
let result = self
.linear
.gather_round(dataset_id, match_size, query, matches.len())?;
if let Some(Sketch::MinHash(match_mh)) =
result.match_.select_sketch(self.linear.template())
{
// Prepare counter for finding the next match by decrementing
// all hashes found in the current match in other datasets
for hash in match_mh.iter_mins() {
if let Some(color) = self.hash_to_color.get(hash) {
counter.subtract(self.colors.indices(color).cloned());
}

// handle special case where threshold was set to 0
if match_size == 0 {
break;
}

let match_sig = self.linear.collection().sig_for_dataset(dataset_id)?;
let match_mh = match_sig.minhash().unwrap().clone();

// Prepare counter for finding the next match by decrementing
// all hashes found in the current match in other datasets
for hash in match_mh.iter_mins() {
if let Some(color) = self.hash_to_color.get(hash) {

Check warning on line 228 in src/core/src/index/revindex/mem_revindex.rs

View check run for this annotation

Codecov / codecov/patch

src/core/src/index/revindex/mem_revindex.rs#L228

Added line #L228 was not covered by tests
counter.subtract(self.colors.indices(color).cloned());
}
counter.remove(&dataset_id);
matches.push(result);
} else {
unimplemented!()
}
counter.remove(&dataset_id);
matches.push(result);
}
Ok(matches)
}
Expand Down
Loading