Skip to content

Commit

Permalink
switch to using scaled; avoid unnecessary clones?
Browse files Browse the repository at this point in the history
  • Loading branch information
ctb committed Oct 6, 2024
1 parent 4ab87e4 commit ffa9683
Showing 1 changed file with 17 additions and 14 deletions.
31 changes: 17 additions & 14 deletions src/core/src/sketch/minhash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -537,13 +537,13 @@ impl KmerMinHash {
}

pub fn count_common(&self, other: &KmerMinHash, downsample: bool) -> Result<u64, Error> {
if downsample && self.max_hash != other.max_hash {
let (first, second) = if self.max_hash < other.max_hash {
if downsample && self.scaled() != other.scaled() {
let (first, second) = if self.scaled() > other.scaled() {
(self, other)
} else {
(other, self)
};
let downsampled_mh = second.clone().downsample_max_hash(first.max_hash)?;
let downsampled_mh = second.clone().downsample_scaled(first.scaled())?;
first.count_common(&downsampled_mh, false)
} else {
self.check_compatible(other)?;
Expand Down Expand Up @@ -685,13 +685,14 @@ impl KmerMinHash {
ignore_abundance: bool,
downsample: bool,
) -> Result<f64, Error> {
if downsample && self.max_hash != other.max_hash {
let (first, second) = if self.max_hash < other.max_hash {
if downsample && self.scaled() != other.scaled() {
// downsample to larger of two scaled
let (first, second) = if self.scaled() > other.scaled() {
(self, other)
} else {
(other, self)
};
let downsampled_mh = second.clone().downsample_max_hash(first.max_hash)?;
let downsampled_mh = second.clone().downsample_scaled(first.scaled())?;
first.similarity(&downsampled_mh, ignore_abundance, false)
} else if ignore_abundance || self.abunds.is_none() || other.abunds.is_none() {
self.jaccard(other)
Expand Down Expand Up @@ -771,7 +772,7 @@ impl KmerMinHash {
pub fn downsample_scaled(self, scaled: u64) -> Result<KmerMinHash, Error> {
// @CTB shouldn't we check that new scaled > old scaled?
if self.scaled() == scaled {
Ok(self.clone()) // avoid clone CTB
Ok(self)
} else {
let mut new_mh = KmerMinHash::new(
scaled,
Expand Down Expand Up @@ -1359,13 +1360,14 @@ impl KmerMinHashBTree {
}

pub fn count_common(&self, other: &KmerMinHashBTree, downsample: bool) -> Result<u64, Error> {
if downsample && self.max_hash != other.max_hash {
let (first, second) = if self.max_hash < other.max_hash {
if downsample && self.scaled() != other.scaled() {
// downsample to the larger of the two scaled values
let (first, second) = if self.scaled() > other.scaled() {
(self, other)
} else {
(other, self)
};
let downsampled_mh = second.clone().downsample_max_hash(first.max_hash)?;
let downsampled_mh = second.clone().downsample_scaled(first.scaled())?;
first.count_common(&downsampled_mh, false)
} else {
self.check_compatible(other)?;
Expand Down Expand Up @@ -1490,13 +1492,14 @@ impl KmerMinHashBTree {
ignore_abundance: bool,
downsample: bool,
) -> Result<f64, Error> {
if downsample && self.max_hash != other.max_hash {
let (first, second) = if self.max_hash < other.max_hash {
if downsample && self.scaled() != other.scaled() {
// downsample to larger of two scaled
let (first, second) = if self.scaled() > other.scaled() {
(self, other)
} else {
(other, self)
};
let downsampled_mh = second.clone().downsample_max_hash(first.max_hash)?;
let downsampled_mh = second.clone().downsample_scaled(first.scaled())?;
first.similarity(&downsampled_mh, ignore_abundance, false)
} else if ignore_abundance || self.abunds.is_none() || other.abunds.is_none() {
self.jaccard(other)
Expand Down Expand Up @@ -1541,7 +1544,7 @@ impl KmerMinHashBTree {
pub fn downsample_scaled(self, scaled: u64) -> Result<KmerMinHashBTree, Error> {
// @CTB shouldn't we check that new scaled > old scaled?
if self.scaled() == scaled {
Ok(self.clone()) // CTB avoid clone...
Ok(self)
} else {
let mut new_mh = KmerMinHashBTree::new(
scaled,
Expand Down

0 comments on commit ffa9683

Please sign in to comment.