Skip to content

Commit

Permalink
basic stuff is working
Browse files Browse the repository at this point in the history
  • Loading branch information
ctb committed Aug 31, 2024
1 parent 7a05d6a commit 8ef3a86
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 27 deletions.
8 changes: 8 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,6 @@ name = "oxli"
crate-type = ["cdylib"]

[dependencies]
pyo3 = "0.19.0"
pyo3 = { version="0.19.0", features = ["extension-module", "anyhow"] }
sourmash = "0.15.1"
anyhow = "1.0.86"
97 changes: 72 additions & 25 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,60 +1,107 @@
use pyo3::prelude::*;
use pyo3::exceptions::PyValueError;
// use rayon::prelude::*;

use anyhow::{Result, Error, anyhow};
use std::collections::HashMap;

use sourmash::sketch::nodegraph::Nodegraph;
// use sourmash::sketch::nodegraph::Nodegraph;
use sourmash::_hash_murmur;
use sourmash::signature::SeqToHashes;
use sourmash::encodings::HashFunctions;


#[pyclass]
struct KmerCountTable {
counts: HashMap<u64, usize>,
counts: HashMap<u64, u64>,
pub ksize: u8,
}

#[pymethods]
impl KmerCountTable {
#[new]
pub fn new() -> Self {
Self { counts: HashMap::new() }
pub fn new(ksize: u8) -> Self {
Self { counts: HashMap::new(), ksize }
}

pub fn count(&mut self, kmer: String) -> PyResult<usize> {
let hashval = _hash_murmur(kmer.as_bytes(), 42);
fn hash_kmer(&self, kmer: String) -> Result<u64> {
if kmer.len() as u8 != self.ksize {
Err(anyhow!("wrong ksize"))
} else {
// mut?
let mut hashes = SeqToHashes::new(kmer.as_bytes(),
self.ksize.into(),
false,
false,
HashFunctions::Murmur64Dna,
42);

let mut hashval = hashes.next().unwrap();
Ok(hashval?)
}
}

let mut count: usize = 1;

pub fn count_hash(&mut self, hashval: u64) -> u64 {
let mut count: u64 = 1;
if self.counts.contains_key(&hashval) {
count = *self.counts.get(&hashval).unwrap();
count = count + 1;
}
self.counts.insert(hashval, count);

Ok(count)
count
}

pub fn get(&self, kmer: String) -> PyResult<usize> {
let hashval = _hash_murmur(kmer.as_bytes(), 42);
pub fn count(&mut self, kmer: String) -> PyResult<u64> {
if kmer.len() as u8 != self.ksize {
Err(PyValueError::new_err("kmer size does not match count table ksize"))
} else {
let hashval = _hash_murmur(kmer.as_bytes(), 42);
let count = self.count_hash(hashval);
Ok(count)
}
}

let count = match self.counts.get(&hashval) {
Some(count) => count,
None => &(0 as usize)
};
Ok(*count)
pub fn get(&self, kmer: String) -> PyResult<u64> {
if kmer.len() as u8 != self.ksize {
Err(PyValueError::new_err("kmer size does not match count table ksize"))
} else {
let hashval = self.hash_kmer(kmer).unwrap();

let count = match self.counts.get(&hashval) {
Some(count) => count,
None => &0
};
Ok(*count)
}
}
}

/// Formats the sum of two numbers as string.
#[pyfunction]
fn sum_as_string(a: String) -> PyResult<usize> {
let mut ng: Nodegraph = Nodegraph::with_tables(23, 6, 3);
// Consume this DNA strnig. Return number of k-mers consumed.
pub fn consume(&mut self, seq: String) -> PyResult<u64> {
let hashes = SeqToHashes::new(seq.as_bytes(),
self.ksize.into(),
false,
false,
HashFunctions::Murmur64Dna,
42);

let hashval = _hash_murmur(a.as_bytes(), 42);
ng.count(hashval);
Ok(ng.get(hashval))
let mut n = 0;
for hash_value in hashes {
match hash_value {
Ok(0) => continue,
Ok(x) => { self.count_hash(x); () }
Err(err) => (),
}
n += 1;
}

Ok(n)
}
}

/// A Python module implemented in Rust.
#[pymodule]
fn oxli(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(sum_as_string, m)?)?;
m.add_class::<KmerCountTable>()?;
Ok(())
}
32 changes: 31 additions & 1 deletion src/python/tests/test_basic.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,39 @@
import pytest
import oxli

def test_simple():
cg = oxli.KmerCountTable()
cg = oxli.KmerCountTable(4)
kmer = "ATCG"

assert cg.get(kmer) == 0
assert cg.count(kmer) == 1
assert cg.get(kmer) == 1


def test_wrong_ksize():
cg = oxli.KmerCountTable(3)
kmer = "ATCG"

with pytest.raises(ValueError):
cg.count(kmer)

with pytest.raises(ValueError):
cg.get(kmer)


def test_consume():
cg = oxli.KmerCountTable(4)
kmer = "ATCG"

assert cg.consume(kmer) == 1
assert cg.get("ATCG") == 1


def test_consume_2():
cg = oxli.KmerCountTable(4)
seq = "ATCGG"

assert cg.consume(seq) == 2
assert cg.get("ATCG") == 1
assert cg.get("TCGG") == 1
assert cg.get("CCGA") == 1 # reverse complement!

0 comments on commit 8ef3a86

Please sign in to comment.