Skip to content

Commit

Permalink
Merge branch 'main' of github.com:dib-lab/oxli into add_and_substract
Browse files Browse the repository at this point in the history
  • Loading branch information
ctb committed Oct 29, 2024
2 parents 904d4ef + 5f27d68 commit 2d0cc85
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 51 deletions.
72 changes: 36 additions & 36 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ name = "oxli"
crate-type = ["cdylib"]

[dependencies]
anyhow = "1.0.89"
anyhow = "1.0.91"
env_logger = "0.11.5"
log = "0.4.22"
niffler = "2.6.0"
pyo3 = { version="0.22.4", features = ["extension-module", "anyhow"] }
pyo3 = { version="0.22.5", features = ["extension-module", "anyhow"] }
rayon = "1.10.0"
serde = { version = "1.0.210", features = ["derive"] }
serde_json = "1.0.128"
sourmash = "0.15.2"
serde = { version = "1.0.213", features = ["derive"] }
serde_json = "1.0.132"
sourmash = "0.16.0"
20 changes: 10 additions & 10 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -871,7 +871,7 @@ impl KmerCountTableIterator {

pub struct KmersAndHashesIter {
seq: String, // The sequence to iterate over
seqb: Vec<u8>, // Sequence bytes
seq_rc: String, // reverse complement sequence
ksize: usize, // K-mer size
pos: usize, // Current position in the sequence
end: usize, // The end position for k-mer extraction
Expand All @@ -883,8 +883,12 @@ impl KmersAndHashesIter {
pub fn new(seq: String, ksize: usize, skip_bad_kmers: bool) -> Self {
let seq = seq.to_ascii_uppercase(); // Ensure uppercase for uniformity
let seqb = seq.as_bytes().to_vec(); // Convert to bytes for hashing
let end = seq.len() - ksize + 1; // Calculate the endpoint for k-mer extraction
let seqb_rc = revcomp(&seqb);
let seq_rc = std::str::from_utf8(&seqb_rc)
.expect("invalid utf-8 sequence for rev comp")
.to_string();

let end = seq.len() - ksize + 1; // Calculate the endpoint for k-mer extraction
let hasher = SeqToHashes::new(
&seqb,
ksize.into(),
Expand All @@ -896,7 +900,7 @@ impl KmersAndHashesIter {

Self {
seq,
seqb,
seq_rc,
ksize,
pos: 0, // Start at the beginning of the sequence
end,
Expand All @@ -917,15 +921,11 @@ impl Iterator for KmersAndHashesIter {

let start = self.pos;
let ksize = self.ksize;
let rpos = self.end - start - 1;

// Extract the current k-mer and its reverse complement
let substr = &self.seq[start..start + ksize];
// CTB: this calculates RC each time, instead of doing so
// using a sliding window. It's easy and works, so I'm
// starting here :).
let substr_b_rc = revcomp(&self.seqb[start..start + ksize]);
let substr_rc =
std::str::from_utf8(&substr_b_rc).expect("invalid utf-8 sequence for rev comp");
let substr_rc = &self.seq_rc[rpos..rpos + ksize];

// Get the next hash value from the hasher
let hashval = self.hasher.next().expect("should not run out of hashes");
Expand All @@ -943,7 +943,7 @@ impl Iterator for KmersAndHashesIter {
} else {
substr_rc
};
// If vaild hash, return (canonical_kmer,hashval) tuple
// If valid hash, return (canonical_kmer,hashval) tuple
Some(Ok((canonical_kmer.to_string(), hashval)))
} else {
// If the hash is 0, handle based on `skip_bad_kmers`
Expand Down

0 comments on commit 2d0cc85

Please sign in to comment.