Skip to content

Commit

Permalink
Merge branch 'main' into add_and_substract
Browse files Browse the repository at this point in the history
  • Loading branch information
Adamtaranto authored Nov 19, 2024
2 parents 2d0cc85 + da6051e commit 5e259bb
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 59 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,9 @@ jobs:
strategy:
matrix:
platform:
- runner: macos-12
- runner: macos-latest
target: x86_64
- runner: macos-14
- runner: macos-latest
target: aarch64
steps:
- uses: actions/checkout@v4
Expand Down
88 changes: 54 additions & 34 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ name = "oxli"
crate-type = ["cdylib"]

[dependencies]
anyhow = "1.0.91"
anyhow = "1.0.93"
env_logger = "0.11.5"
log = "0.4.22"
niffler = "2.6.0"
pyo3 = { version="0.22.5", features = ["extension-module", "anyhow"] }
pyo3 = { version="0.23.1", features = ["extension-module", "anyhow"] }
rayon = "1.10.0"
serde = { version = "1.0.213", features = ["derive"] }
serde_json = "1.0.132"
sourmash = "0.16.0"
serde = { version = "1.0.215", features = ["derive"] }
serde_json = "1.0.133"
sourmash = "0.17.2"
31 changes: 13 additions & 18 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ impl KmerCountTable {
pub fn unhash(&self, hash: u64) -> PyResult<String> {
if self.store_kmers {
if let Some(kmer) = self.hash_to_kmer.as_ref().unwrap().get(&hash) {
return Ok(kmer.clone());
Ok(kmer.clone())
} else {
// Raise KeyError if hash does not exist
let msg = format!("Warning: Hash {} not found in table.", hash);
Expand Down Expand Up @@ -175,10 +175,7 @@ impl KmerCountTable {
} else {
let hashval = self.hash_kmer(kmer).expect("error hashing this k-mer");

let count = match self.counts.get(&hashval) {
Some(count) => count,
None => &0,
};
let count = self.counts.get(&hashval).unwrap_or(&0);
debug!("get: hashval {}, count {}", hashval, count);
Ok(*count)
}
Expand Down Expand Up @@ -545,26 +542,25 @@ impl KmerCountTable {
// else if "false" consume kmers until a bad kmer in encountered, then
// exit with error.
#[pyo3(signature = (seq, skip_bad_kmers=true))]
pub fn consume(&mut self, seq: String, skip_bad_kmers: bool) -> PyResult<u64> {
pub fn consume(&mut self, seq: &str, skip_bad_kmers: bool) -> PyResult<u64> {
// Incoming seq len
let new_len = seq.len();
// Init tally for consumed kmers
let mut n = 0;
// If store_kmers is true, then count & log hash:kmer pairs
if self.store_kmers {
let hash_to_kmer = self.hash_to_kmer.as_mut().unwrap();

// Create an iterator for (canonical_kmer, hash) pairs
let mut iter = KmersAndHashesIter::new(seq, self.ksize as usize, skip_bad_kmers);
let iter = KmersAndHashesIter::new(seq, self.ksize as usize, skip_bad_kmers);

// Iterate over the k-mers and their hashes
while let Some(result) = iter.next() {
for result in iter {
match result {
Ok((kmer, hash)) => {
if hash != 0 {
// Insert hash:kmer pair into the hashmap
self.hash_to_kmer
.as_mut()
.unwrap()
.insert(hash, kmer.clone());
hash_to_kmer.insert(hash, kmer.clone());
// Increment the count for the hash
*self.counts.entry(hash).or_insert(0) += 1;
// Tally kmers added
Expand All @@ -591,7 +587,6 @@ impl KmerCountTable {
Ok(0) => continue,
Ok(x) => {
self.count_hash(x);
()
}
Err(_) => {
let msg = format!("bad k-mer encountered at position {}", n);
Expand Down Expand Up @@ -686,16 +681,16 @@ impl KmerCountTable {
#[pyo3(signature = (seq, skip_bad_kmers=true))]
pub fn kmers_and_hashes(
&self,
seq: String,
seq: &str,
skip_bad_kmers: bool,
) -> PyResult<Vec<(String, u64)>> {
let mut v: Vec<(String, u64)> = vec![];

// Create the iterator
let mut iter = KmersAndHashesIter::new(seq, self.ksize as usize, skip_bad_kmers);
let iter = KmersAndHashesIter::new(seq, self.ksize as usize, skip_bad_kmers);

// Collect the k-mers and their hashes
while let Some(result) = iter.next() {
for result in iter {
match result {
Ok((kmer, hash)) => v.push((kmer, hash)),
Err(e) => return Err(e),
Expand Down Expand Up @@ -880,7 +875,7 @@ pub struct KmersAndHashesIter {
}

impl KmersAndHashesIter {
pub fn new(seq: String, ksize: usize, skip_bad_kmers: bool) -> Self {
pub fn new(seq: &str, ksize: usize, skip_bad_kmers: bool) -> Self {
let seq = seq.to_ascii_uppercase(); // Ensure uppercase for uniformity
let seqb = seq.as_bytes().to_vec(); // Convert to bytes for hashing
let seqb_rc = revcomp(&seqb);
Expand All @@ -891,7 +886,7 @@ impl KmersAndHashesIter {
let end = seq.len() - ksize + 1; // Calculate the endpoint for k-mer extraction
let hasher = SeqToHashes::new(
&seqb,
ksize.into(),
ksize,
true, // Set force to true, bad kmers will emit hash=0 instead of killing process
false, // Other flags, e.g., reverse complement
HashFunctions::Murmur64Dna,
Expand Down

0 comments on commit 5e259bb

Please sign in to comment.