diff --git a/Cargo.lock b/Cargo.lock index 8463986..0f0e425 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -89,9 +89,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.89" +version = "1.0.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86fdf8605db99b54d3cd748a44c6d04df638eb5dafb219b135d0149bd0db01f6" +checksum = "c042108f3ed77fd83760a5fd79b53be043192bb3b9dba91d8c574c0ada7850c8" [[package]] name = "approx" @@ -497,9 +497,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.70" +version = "0.3.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1868808506b929d7b0cfa8f75951347aa71bb21144b7791bae35d9bccfcfe37a" +checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9" dependencies = [ "wasm-bindgen", ] @@ -857,9 +857,9 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.22.4" +version = "0.22.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00e89ce2565d6044ca31a3eb79a334c3a79a841120a98f64eea9f579564cb691" +checksum = "3d922163ba1f79c04bc49073ba7b32fd5a8d3b76a87c955921234b8e77333c51" dependencies = [ "anyhow", "cfg-if", @@ -876,9 +876,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.22.4" +version = "0.22.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8afbaf3abd7325e08f35ffb8deb5892046fcb2608b703db6a583a5ba4cea01e" +checksum = "bc38c5feeb496c8321091edf3d63e9a6829eab4b863b4a6a65f26f3e9cc6b179" dependencies = [ "once_cell", "target-lexicon", @@ -886,9 +886,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.22.4" +version = "0.22.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec15a5ba277339d04763f4c23d85987a5b08cbb494860be141e6a10a8eb88022" +checksum = "94845622d88ae274d2729fcefc850e63d7a3ddff5e3ce11bd88486db9f1d357d" dependencies = [ "libc", "pyo3-build-config", @@ -896,9 +896,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.22.4" +version = "0.22.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15e0f01b5364bcfbb686a52fc4181d412b708a68ed20c330db9fc8d2c2bf5a43" +checksum = "e655aad15e09b94ffdb3ce3d217acf652e26bbc37697ef012f5e5e348c716e5e" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -908,9 +908,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.22.4" +version = "0.22.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a09b550200e1e5ed9176976d0060cbc2ea82dc8515da07885e7b8153a85caacb" +checksum = "ae1e3f09eecd94618f60a455a23def79f79eba4dc561a97324bf9ac8c6df30ce" dependencies = [ "heck 0.5.0", "proc-macro2", @@ -1056,18 +1056,18 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.210" +version = "1.0.213" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" +checksum = "3ea7893ff5e2466df8d720bb615088341b295f849602c6956047f8f80f0e9bc1" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.210" +version = "1.0.213" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" +checksum = "7e85ad2009c50b58e87caa8cd6dac16bdf511bbfb7af6c33df902396aa480fa5" dependencies = [ "proc-macro2", "quote", @@ -1076,9 +1076,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.128" +version = "1.0.132" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" +checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03" dependencies = [ "itoa", "memchr", @@ -1119,9 +1119,9 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" -version = "0.15.2" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a73bae93170d8d0f816e18b6a630d76e134b90958850985ee2f0fb2f641d4de" +checksum = "596f20eac8896a06ca65889399ea6f408deeba375aa44c4a2efb3b46e31a02c0" dependencies = [ "az", "byteorder", @@ -1192,9 +1192,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.77" +version = "2.0.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f35bcdf61fd8e7be6caf75f429fdca8beb3ed76584befb503b1569faee373ed" +checksum = "5023162dfcd14ef8f32034d8bcd4cc5ddc61ef7a247c024a33e24e1f24d21b56" dependencies = [ "proc-macro2", "quote", @@ -1311,9 +1311,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.93" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a82edfc16a6c469f5f44dc7b571814045d60404b55a0ee849f9bcfa2e63dd9b5" +checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e" dependencies = [ "cfg-if", "once_cell", @@ -1322,9 +1322,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.93" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9de396da306523044d3302746f1208fa71d7532227f15e347e2d93e4145dd77b" +checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358" dependencies = [ "bumpalo", "log", @@ -1337,9 +1337,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.93" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "585c4c91a46b072c92e908d99cb1dcdf95c5218eeb6f3bf1efa991ee7a68cccf" +checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -1347,9 +1347,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.93" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836" +checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" dependencies = [ "proc-macro2", "quote", @@ -1360,15 +1360,15 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.93" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c62a0a307cb4a311d3a07867860911ca130c3494e8c2719593806c08bc5d0484" +checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" [[package]] name = "web-sys" -version = "0.3.70" +version = "0.3.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26fdeaafd9bd129f65e7c031593c24d62186301e0c72c8978fa1678be7d532c0" +checksum = "f6488b90108c040df0fe62fa815cbdee25124641df01814dd7282749234c6112" dependencies = [ "js-sys", "wasm-bindgen", diff --git a/Cargo.toml b/Cargo.toml index b1b5f47..76260e2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,12 +9,12 @@ name = "oxli" crate-type = ["cdylib"] [dependencies] -anyhow = "1.0.89" +anyhow = "1.0.91" env_logger = "0.11.5" log = "0.4.22" niffler = "2.6.0" -pyo3 = { version="0.22.4", features = ["extension-module", "anyhow"] } +pyo3 = { version="0.22.5", features = ["extension-module", "anyhow"] } rayon = "1.10.0" -serde = { version = "1.0.210", features = ["derive"] } -serde_json = "1.0.128" -sourmash = "0.15.2" \ No newline at end of file +serde = { version = "1.0.213", features = ["derive"] } +serde_json = "1.0.132" +sourmash = "0.16.0" \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 71810e6..c76424d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -871,7 +871,7 @@ impl KmerCountTableIterator { pub struct KmersAndHashesIter { seq: String, // The sequence to iterate over - seqb: Vec, // Sequence bytes + seq_rc: String, // reverse complement sequence ksize: usize, // K-mer size pos: usize, // Current position in the sequence end: usize, // The end position for k-mer extraction @@ -883,8 +883,12 @@ impl KmersAndHashesIter { pub fn new(seq: String, ksize: usize, skip_bad_kmers: bool) -> Self { let seq = seq.to_ascii_uppercase(); // Ensure uppercase for uniformity let seqb = seq.as_bytes().to_vec(); // Convert to bytes for hashing - let end = seq.len() - ksize + 1; // Calculate the endpoint for k-mer extraction + let seqb_rc = revcomp(&seqb); + let seq_rc = std::str::from_utf8(&seqb_rc) + .expect("invalid utf-8 sequence for rev comp") + .to_string(); + let end = seq.len() - ksize + 1; // Calculate the endpoint for k-mer extraction let hasher = SeqToHashes::new( &seqb, ksize.into(), @@ -896,7 +900,7 @@ impl KmersAndHashesIter { Self { seq, - seqb, + seq_rc, ksize, pos: 0, // Start at the beginning of the sequence end, @@ -917,15 +921,11 @@ impl Iterator for KmersAndHashesIter { let start = self.pos; let ksize = self.ksize; + let rpos = self.end - start - 1; // Extract the current k-mer and its reverse complement let substr = &self.seq[start..start + ksize]; - // CTB: this calculates RC each time, instead of doing so - // using a sliding window. It's easy and works, so I'm - // starting here :). - let substr_b_rc = revcomp(&self.seqb[start..start + ksize]); - let substr_rc = - std::str::from_utf8(&substr_b_rc).expect("invalid utf-8 sequence for rev comp"); + let substr_rc = &self.seq_rc[rpos..rpos + ksize]; // Get the next hash value from the hasher let hashval = self.hasher.next().expect("should not run out of hashes"); @@ -943,7 +943,7 @@ impl Iterator for KmersAndHashesIter { } else { substr_rc }; - // If vaild hash, return (canonical_kmer,hashval) tuple + // If valid hash, return (canonical_kmer,hashval) tuple Some(Ok((canonical_kmer.to_string(), hashval))) } else { // If the hash is 0, handle based on `skip_bad_kmers`