diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 65d5993..660341e 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -116,9 +116,9 @@ jobs: strategy: matrix: platform: - - runner: macos-12 + - runner: macos-latest target: x86_64 - - runner: macos-14 + - runner: macos-latest target: aarch64 steps: - uses: actions/checkout@v4 diff --git a/Cargo.lock b/Cargo.lock index 0f0e425..6750fba 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -89,9 +89,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.91" +version = "1.0.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c042108f3ed77fd83760a5fd79b53be043192bb3b9dba91d8c574c0ada7850c8" +checksum = "4c95c10ba0b00a02636238b814946408b1322d5ac4760326e6fb8ec956d85775" [[package]] name = "approx" @@ -121,7 +121,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b64fd8980fb64af5951bc05de7772b598150a6f7eac42ec17f73e8489915f99b" dependencies = [ "flate2", - "thiserror", + "thiserror 1.0.63", ] [[package]] @@ -290,9 +290,9 @@ checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" [[package]] name = "csv" -version = "1.3.0" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" +checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf" dependencies = [ "csv-core", "itoa", @@ -405,11 +405,11 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "histogram" -version = "0.11.0" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b62b8d85713ddc62e5e78db13bf9f9305610d0419276faa845076a68b7165872" +checksum = "58cf6b99a250776d813cdf2f0b478a053a822d078e7a2baf5cb36afc88c41a7c" dependencies = [ - "thiserror", + "thiserror 1.0.63", ] [[package]] @@ -634,9 +634,9 @@ dependencies = [ [[package]] name = "needletail" -version = "0.5.1" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db05a5ab397f64070d8c998fa0fbb84e484b81f95752af317dac183a82d9295d" +checksum = "f29a3c5015d6985f33318d154fa0c41315eb2e7df29432c844c74a83434bfe21" dependencies = [ "buffer-redux", "bytecount", @@ -654,7 +654,7 @@ dependencies = [ "cfg-if", "flate2", "liblzma", - "thiserror", + "thiserror 1.0.63", "zstd", ] @@ -778,7 +778,7 @@ dependencies = [ "flate2", "log", "memchr", - "thiserror", + "thiserror 1.0.63", ] [[package]] @@ -857,9 +857,9 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.22.5" +version = "0.23.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d922163ba1f79c04bc49073ba7b32fd5a8d3b76a87c955921234b8e77333c51" +checksum = "7ebb0c0cc0de9678e53be9ccf8a2ab53045e6e3a8be03393ceccc5e7396ccb40" dependencies = [ "anyhow", "cfg-if", @@ -876,9 +876,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.22.5" +version = "0.23.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc38c5feeb496c8321091edf3d63e9a6829eab4b863b4a6a65f26f3e9cc6b179" +checksum = "80e3ce69c4ec34476534b490e412b871ba03a82e35604c3dfb95fcb6bfb60c09" dependencies = [ "once_cell", "target-lexicon", @@ -886,9 +886,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.22.5" +version = "0.23.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94845622d88ae274d2729fcefc850e63d7a3ddff5e3ce11bd88486db9f1d357d" +checksum = "3b09f311c76b36dfd6dd6f7fa6f9f18e7e46a1c937110d283e80b12ba2468a75" dependencies = [ "libc", "pyo3-build-config", @@ -896,9 +896,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.22.5" +version = "0.23.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e655aad15e09b94ffdb3ce3d217acf652e26bbc37697ef012f5e5e348c716e5e" +checksum = "fd4f74086536d1e1deaff99ec0387481fb3325c82e4e48be0e75ab3d3fcb487a" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -908,9 +908,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.22.5" +version = "0.23.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae1e3f09eecd94618f60a455a23def79f79eba4dc561a97324bf9ac8c6df30ce" +checksum = "9e77dfeb76b32bbf069144a5ea0a36176ab59c8db9ce28732d0f06f096bbfbc8" dependencies = [ "heck 0.5.0", "proc-macro2", @@ -1056,18 +1056,18 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.213" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ea7893ff5e2466df8d720bb615088341b295f849602c6956047f8f80f0e9bc1" +checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.213" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e85ad2009c50b58e87caa8cd6dac16bdf511bbfb7af6c33df902396aa480fa5" +checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" dependencies = [ "proc-macro2", "quote", @@ -1076,9 +1076,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.132" +version = "1.0.133" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03" +checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377" dependencies = [ "itoa", "memchr", @@ -1119,9 +1119,9 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" -version = "0.16.0" +version = "0.17.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "596f20eac8896a06ca65889399ea6f408deeba375aa44c4a2efb3b46e31a02c0" +checksum = "54e30f752d984b1d8456024973f8d89772b4ba248f592b77b57d59ad27a232a0" dependencies = [ "az", "byteorder", @@ -1155,7 +1155,7 @@ dependencies = [ "serde_json", "statrs", "streaming-stats", - "thiserror", + "thiserror 2.0.3", "twox-hash", "typed-builder", "vec-collections", @@ -1192,9 +1192,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.85" +version = "2.0.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5023162dfcd14ef8f32034d8bcd4cc5ddc61ef7a247c024a33e24e1f24d21b56" +checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" dependencies = [ "proc-macro2", "quote", @@ -1213,7 +1213,16 @@ version = "1.0.63" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724" dependencies = [ - "thiserror-impl", + "thiserror-impl 1.0.63", +] + +[[package]] +name = "thiserror" +version = "2.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c006c85c7651b3cf2ada4584faa36773bd07bac24acfb39f3c431b36d7e667aa" +dependencies = [ + "thiserror-impl 2.0.3", ] [[package]] @@ -1227,6 +1236,17 @@ dependencies = [ "syn", ] +[[package]] +name = "thiserror-impl" +version = "2.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f077553d607adc1caf65430528a576c757a71ed73944b66ebb58ef2bbd243568" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "twox-hash" version = "1.6.3" diff --git a/Cargo.toml b/Cargo.toml index 76260e2..f7be020 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,12 +9,12 @@ name = "oxli" crate-type = ["cdylib"] [dependencies] -anyhow = "1.0.91" +anyhow = "1.0.93" env_logger = "0.11.5" log = "0.4.22" niffler = "2.6.0" -pyo3 = { version="0.22.5", features = ["extension-module", "anyhow"] } +pyo3 = { version="0.23.1", features = ["extension-module", "anyhow"] } rayon = "1.10.0" -serde = { version = "1.0.213", features = ["derive"] } -serde_json = "1.0.132" -sourmash = "0.16.0" \ No newline at end of file +serde = { version = "1.0.215", features = ["derive"] } +serde_json = "1.0.133" +sourmash = "0.17.2" \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index c76424d..5da448d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -84,7 +84,7 @@ impl KmerCountTable { pub fn unhash(&self, hash: u64) -> PyResult { if self.store_kmers { if let Some(kmer) = self.hash_to_kmer.as_ref().unwrap().get(&hash) { - return Ok(kmer.clone()); + Ok(kmer.clone()) } else { // Raise KeyError if hash does not exist let msg = format!("Warning: Hash {} not found in table.", hash); @@ -175,10 +175,7 @@ impl KmerCountTable { } else { let hashval = self.hash_kmer(kmer).expect("error hashing this k-mer"); - let count = match self.counts.get(&hashval) { - Some(count) => count, - None => &0, - }; + let count = self.counts.get(&hashval).unwrap_or(&0); debug!("get: hashval {}, count {}", hashval, count); Ok(*count) } @@ -545,26 +542,25 @@ impl KmerCountTable { // else if "false" consume kmers until a bad kmer in encountered, then // exit with error. #[pyo3(signature = (seq, skip_bad_kmers=true))] - pub fn consume(&mut self, seq: String, skip_bad_kmers: bool) -> PyResult { + pub fn consume(&mut self, seq: &str, skip_bad_kmers: bool) -> PyResult { // Incoming seq len let new_len = seq.len(); // Init tally for consumed kmers let mut n = 0; // If store_kmers is true, then count & log hash:kmer pairs if self.store_kmers { + let hash_to_kmer = self.hash_to_kmer.as_mut().unwrap(); + // Create an iterator for (canonical_kmer, hash) pairs - let mut iter = KmersAndHashesIter::new(seq, self.ksize as usize, skip_bad_kmers); + let iter = KmersAndHashesIter::new(seq, self.ksize as usize, skip_bad_kmers); // Iterate over the k-mers and their hashes - while let Some(result) = iter.next() { + for result in iter { match result { Ok((kmer, hash)) => { if hash != 0 { // Insert hash:kmer pair into the hashmap - self.hash_to_kmer - .as_mut() - .unwrap() - .insert(hash, kmer.clone()); + hash_to_kmer.insert(hash, kmer.clone()); // Increment the count for the hash *self.counts.entry(hash).or_insert(0) += 1; // Tally kmers added @@ -591,7 +587,6 @@ impl KmerCountTable { Ok(0) => continue, Ok(x) => { self.count_hash(x); - () } Err(_) => { let msg = format!("bad k-mer encountered at position {}", n); @@ -686,16 +681,16 @@ impl KmerCountTable { #[pyo3(signature = (seq, skip_bad_kmers=true))] pub fn kmers_and_hashes( &self, - seq: String, + seq: &str, skip_bad_kmers: bool, ) -> PyResult> { let mut v: Vec<(String, u64)> = vec![]; // Create the iterator - let mut iter = KmersAndHashesIter::new(seq, self.ksize as usize, skip_bad_kmers); + let iter = KmersAndHashesIter::new(seq, self.ksize as usize, skip_bad_kmers); // Collect the k-mers and their hashes - while let Some(result) = iter.next() { + for result in iter { match result { Ok((kmer, hash)) => v.push((kmer, hash)), Err(e) => return Err(e), @@ -880,7 +875,7 @@ pub struct KmersAndHashesIter { } impl KmersAndHashesIter { - pub fn new(seq: String, ksize: usize, skip_bad_kmers: bool) -> Self { + pub fn new(seq: &str, ksize: usize, skip_bad_kmers: bool) -> Self { let seq = seq.to_ascii_uppercase(); // Ensure uppercase for uniformity let seqb = seq.as_bytes().to_vec(); // Convert to bytes for hashing let seqb_rc = revcomp(&seqb); @@ -891,7 +886,7 @@ impl KmersAndHashesIter { let end = seq.len() - ksize + 1; // Calculate the endpoint for k-mer extraction let hasher = SeqToHashes::new( &seqb, - ksize.into(), + ksize, true, // Set force to true, bad kmers will emit hash=0 instead of killing process false, // Other flags, e.g., reverse complement HashFunctions::Murmur64Dna,