-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
113 additions
and
27 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,60 +1,107 @@ | ||
use pyo3::prelude::*; | ||
use pyo3::exceptions::PyValueError; | ||
// use rayon::prelude::*; | ||
|
||
use anyhow::{Result, Error, anyhow}; | ||
use std::collections::HashMap; | ||
|
||
use sourmash::sketch::nodegraph::Nodegraph; | ||
// use sourmash::sketch::nodegraph::Nodegraph; | ||
use sourmash::_hash_murmur; | ||
use sourmash::signature::SeqToHashes; | ||
use sourmash::encodings::HashFunctions; | ||
|
||
|
||
#[pyclass] | ||
struct KmerCountTable { | ||
counts: HashMap<u64, usize>, | ||
counts: HashMap<u64, u64>, | ||
pub ksize: u8, | ||
} | ||
|
||
#[pymethods] | ||
impl KmerCountTable { | ||
#[new] | ||
pub fn new() -> Self { | ||
Self { counts: HashMap::new() } | ||
pub fn new(ksize: u8) -> Self { | ||
Self { counts: HashMap::new(), ksize } | ||
} | ||
|
||
pub fn count(&mut self, kmer: String) -> PyResult<usize> { | ||
let hashval = _hash_murmur(kmer.as_bytes(), 42); | ||
fn hash_kmer(&self, kmer: String) -> Result<u64> { | ||
if kmer.len() as u8 != self.ksize { | ||
Err(anyhow!("wrong ksize")) | ||
} else { | ||
// mut? | ||
let mut hashes = SeqToHashes::new(kmer.as_bytes(), | ||
self.ksize.into(), | ||
false, | ||
false, | ||
HashFunctions::Murmur64Dna, | ||
42); | ||
|
||
let mut hashval = hashes.next().unwrap(); | ||
Ok(hashval?) | ||
} | ||
} | ||
|
||
let mut count: usize = 1; | ||
|
||
pub fn count_hash(&mut self, hashval: u64) -> u64 { | ||
let mut count: u64 = 1; | ||
if self.counts.contains_key(&hashval) { | ||
count = *self.counts.get(&hashval).unwrap(); | ||
count = count + 1; | ||
} | ||
self.counts.insert(hashval, count); | ||
|
||
Ok(count) | ||
count | ||
} | ||
|
||
pub fn get(&self, kmer: String) -> PyResult<usize> { | ||
let hashval = _hash_murmur(kmer.as_bytes(), 42); | ||
pub fn count(&mut self, kmer: String) -> PyResult<u64> { | ||
if kmer.len() as u8 != self.ksize { | ||
Err(PyValueError::new_err("kmer size does not match count table ksize")) | ||
} else { | ||
let hashval = _hash_murmur(kmer.as_bytes(), 42); | ||
let count = self.count_hash(hashval); | ||
Ok(count) | ||
} | ||
} | ||
|
||
let count = match self.counts.get(&hashval) { | ||
Some(count) => count, | ||
None => &(0 as usize) | ||
}; | ||
Ok(*count) | ||
pub fn get(&self, kmer: String) -> PyResult<u64> { | ||
if kmer.len() as u8 != self.ksize { | ||
Err(PyValueError::new_err("kmer size does not match count table ksize")) | ||
} else { | ||
let hashval = self.hash_kmer(kmer).unwrap(); | ||
|
||
let count = match self.counts.get(&hashval) { | ||
Some(count) => count, | ||
None => &0 | ||
}; | ||
Ok(*count) | ||
} | ||
} | ||
} | ||
|
||
/// Formats the sum of two numbers as string. | ||
#[pyfunction] | ||
fn sum_as_string(a: String) -> PyResult<usize> { | ||
let mut ng: Nodegraph = Nodegraph::with_tables(23, 6, 3); | ||
// Consume this DNA strnig. Return number of k-mers consumed. | ||
pub fn consume(&mut self, seq: String) -> PyResult<u64> { | ||
let hashes = SeqToHashes::new(seq.as_bytes(), | ||
self.ksize.into(), | ||
false, | ||
false, | ||
HashFunctions::Murmur64Dna, | ||
42); | ||
|
||
let hashval = _hash_murmur(a.as_bytes(), 42); | ||
ng.count(hashval); | ||
Ok(ng.get(hashval)) | ||
let mut n = 0; | ||
for hash_value in hashes { | ||
match hash_value { | ||
Ok(0) => continue, | ||
Ok(x) => { self.count_hash(x); () } | ||
Err(err) => (), | ||
} | ||
n += 1; | ||
} | ||
|
||
Ok(n) | ||
} | ||
} | ||
|
||
/// A Python module implemented in Rust. | ||
#[pymodule] | ||
fn oxli(_py: Python, m: &PyModule) -> PyResult<()> { | ||
m.add_function(wrap_pyfunction!(sum_as_string, m)?)?; | ||
m.add_class::<KmerCountTable>()?; | ||
Ok(()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,39 @@ | ||
import pytest | ||
import oxli | ||
|
||
def test_simple(): | ||
cg = oxli.KmerCountTable() | ||
cg = oxli.KmerCountTable(4) | ||
kmer = "ATCG" | ||
|
||
assert cg.get(kmer) == 0 | ||
assert cg.count(kmer) == 1 | ||
assert cg.get(kmer) == 1 | ||
|
||
|
||
def test_wrong_ksize(): | ||
cg = oxli.KmerCountTable(3) | ||
kmer = "ATCG" | ||
|
||
with pytest.raises(ValueError): | ||
cg.count(kmer) | ||
|
||
with pytest.raises(ValueError): | ||
cg.get(kmer) | ||
|
||
|
||
def test_consume(): | ||
cg = oxli.KmerCountTable(4) | ||
kmer = "ATCG" | ||
|
||
assert cg.consume(kmer) == 1 | ||
assert cg.get("ATCG") == 1 | ||
|
||
|
||
def test_consume_2(): | ||
cg = oxli.KmerCountTable(4) | ||
seq = "ATCGG" | ||
|
||
assert cg.consume(seq) == 2 | ||
assert cg.get("ATCG") == 1 | ||
assert cg.get("TCGG") == 1 | ||
assert cg.get("CCGA") == 1 # reverse complement! |