From 7129c55e73fe2257b46d629cd2c52680526c85a7 Mon Sep 17 00:00:00 2001 From: Adam Taranto Date: Sun, 22 Sep 2024 13:32:54 +1000 Subject: [PATCH] Jaccard Similarity Coefficient --- src/lib.rs | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index f90502f..00875d9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -441,13 +441,26 @@ impl KmerCountTable { Ok(()) } - // Jaccard + /// Calculates the Jaccard Similarity Coefficient between two KmerCountTable objects. + /// # Returns + /// The Jaccard Similarity Coefficient between the two tables as a float value between 0 and 1. + pub fn jaccard(&self, other: &KmerCountTable) -> f64 { + // Get the intersection of the two k-mer sets. + let intersection_size = self.intersection(other).len(); + + // Get the union of the two k-mer sets. + let union_size = self.union(other).len(); + + // Handle the case where the union is empty (both sets are empty). + if union_size == 0 { + return 1.0; // By convention, two empty sets are considered identical. + } + + // Calculate and return the Jaccard similarity as a ratio of intersection to union. + intersection_size as f64 / union_size as f64 + } /// Cosine similarity between two `KmerCountTable` objects. - /// - /// # Arguments - /// * `other` - The second `KmerCountTable` to compare against. - /// /// # Returns /// The cosine similarity between the two tables as a float value between 0 and 1. pub fn cosine(&self, other: &KmerCountTable) -> f64 {