From 816a1241f01e26169b1637231f8463dbfddbd14c Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Wed, 4 Sep 2024 06:32:50 -0700 Subject: [PATCH 1/3] stop using _hash_murmur --- Cargo.lock | 145 +++++++++++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 2 + src/lib.rs | 6 ++- 3 files changed, 151 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bb07583..8d017ca 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,6 +8,15 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + [[package]] name = "aliasable" version = "0.1.3" @@ -29,6 +38,55 @@ dependencies = [ "libc", ] +[[package]] +name = "anstream" +version = "0.6.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" + +[[package]] +name = "anstyle-parse" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8" +dependencies = [ + "anstyle", + "windows-sys", +] + [[package]] name = "anyhow" version = "1.0.86" @@ -142,6 +200,12 @@ dependencies = [ "csv", ] +[[package]] +name = "colorchoice" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" + [[package]] name = "core-foundation-sys" version = "0.8.7" @@ -205,6 +269,29 @@ dependencies = [ "syn 2.0.77", ] +[[package]] +name = "env_filter" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f2c92ceda6ceec50f43169f9ee8424fe2db276791afde7b2cd8bc084cb376ab" +dependencies = [ + "log", + "regex", +] + +[[package]] +name = "env_logger" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13fa619b91fb2381732789fc5de83b45675e882f66623b7d8cb4f643017018d" +dependencies = [ + "anstream", + "anstyle", + "env_filter", + "humantime", + "log", +] + [[package]] name = "fixedbitset" version = "0.4.2" @@ -267,6 +354,12 @@ dependencies = [ "thiserror", ] +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + [[package]] name = "iana-time-zone" version = "0.1.60" @@ -305,6 +398,12 @@ dependencies = [ "smallvec", ] +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + [[package]] name = "itertools" version = "0.12.1" @@ -559,6 +658,8 @@ name = "oxli" version = "0.2.2" dependencies = [ "anyhow", + "env_logger", + "log", "pyo3", "sourmash", ] @@ -774,6 +875,35 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" +[[package]] +name = "regex" +version = "1.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" + [[package]] name = "roaring" version = "0.10.6" @@ -1038,6 +1168,12 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + [[package]] name = "vec-collections" version = "0.4.3" @@ -1149,6 +1285,15 @@ dependencies = [ "windows-targets", ] +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + [[package]] name = "windows-targets" version = "0.52.6" diff --git a/Cargo.toml b/Cargo.toml index b0043a7..fa7cf87 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,3 +12,5 @@ crate-type = ["cdylib"] pyo3 = { version="0.22.2", features = ["extension-module", "anyhow"] } sourmash = "0.15.1" anyhow = "1.0.86" +log = "0.4.22" +env_logger = "0.11.5" diff --git a/src/lib.rs b/src/lib.rs index 5cc9322..f8ac399 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,10 +3,10 @@ use pyo3::prelude::*; // use rayon::prelude::*; use anyhow::{anyhow, Result}; +use log::debug; use std::collections::HashMap; // use sourmash::sketch::nodegraph::Nodegraph; -use sourmash::_hash_murmur; use sourmash::encodings::HashFunctions; use sourmash::signature::SeqToHashes; @@ -63,7 +63,7 @@ impl KmerCountTable { "kmer size does not match count table ksize", )) } else { - let hashval = _hash_murmur(kmer.as_bytes(), 42); + let hashval = self.hash_kmer(kmer).unwrap(); let count = self.count_hash(hashval); Ok(count) } @@ -81,6 +81,7 @@ impl KmerCountTable { Some(count) => count, None => &0, }; + debug!("get: hashval {}, count {}", hashval, count); Ok(*count) } } @@ -121,6 +122,7 @@ impl KmerCountTable { #[pymodule] fn oxli(m: &Bound<'_, PyModule>) -> PyResult<()> { + env_logger::init(); m.add_class::()?; Ok(()) } From 267b60660c65d97a846e24e4b16405be4b363bcc Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Wed, 4 Sep 2024 06:35:51 -0700 Subject: [PATCH 2/3] add test --- src/python/tests/test_basic.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/python/tests/test_basic.py b/src/python/tests/test_basic.py index 03e5325..edc2a8d 100644 --- a/src/python/tests/test_basic.py +++ b/src/python/tests/test_basic.py @@ -79,3 +79,19 @@ def test_consume_bad_DNA_ignore_is_default(): assert cg.get("ATCG") == 1 assert cg.get("TCGG") == 1 assert cg.get("CCGA") == 1 # rc + + +def test_count_get(): + # test a bug reported by adam taranto: count and get should work together! + kmer = 'TAAACCCTAACCCTAACCCTAACCCTAACCC' + + cg = oxli.KmerCountTable(ksize=31) + hashkey = cg.hash_kmer(kmer) + + assert cg.get(kmer) == 0 + assert cg.count(kmer) == 1 + assert cg.count(kmer) == 2 + + x = cg.get(kmer) + assert x == 2, x + From 02001e419cc64eb4523017d7335109b7e5c41560 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Wed, 4 Sep 2024 06:36:39 -0700 Subject: [PATCH 3/3] bump to v0.2.3 --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8d017ca..4e355c4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -655,7 +655,7 @@ dependencies = [ [[package]] name = "oxli" -version = "0.2.2" +version = "0.2.3" dependencies = [ "anyhow", "env_logger", diff --git a/Cargo.toml b/Cargo.toml index fa7cf87..54451f7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "oxli" -version = "0.2.2" +version = "0.2.3" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html