Skip to content

Commit

Permalink
Add new dunder methods and attributes (#27)
Browse files Browse the repository at this point in the history
* Add toml package as testing dependency.

* Add version attribute to new KmerCountTable instances.

* Test version attribute.

* Track total bases consumed with attr consumed

* Add tests for consumed attr

* Add sum_counts attr to get total counts in table.

* Add tests for sum_counts attr

* Add __len__ method to get total unique kmer count.

* Add tests for __len__ method.

* Add __getitem__ and __setitem__ methods

* Tests for __getitem__ and __setitem__ dunder methods.

* Make KmerCountTable iterable

* Add tests for __iter__

* MRG: some suggested changes for new dunder methods (#34)

* Bump anyhow from 1.0.87 to 1.0.89 (#33)

Bumps [anyhow](https://github.com/dtolnay/anyhow) from 1.0.87 to 1.0.89.
- [Release notes](https://github.com/dtolnay/anyhow/releases)
- [Commits](dtolnay/anyhow@1.0.87...1.0.89)

---
updated-dependencies:
- dependency-name: anyhow
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <[email protected]>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>

* Bump pyo3 from 0.22.2 to 0.22.3 (#32)

Bumps [pyo3](https://github.com/pyo3/pyo3) from 0.22.2 to 0.22.3.
- [Release notes](https://github.com/pyo3/pyo3/releases)
- [Changelog](https://github.com/PyO3/pyo3/blob/v0.22.3/CHANGELOG.md)
- [Commits](PyO3/pyo3@v0.22.2...v0.22.3)

---
updated-dependencies:
- dependency-name: pyo3
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <[email protected]>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>

* pass thru errors; remove clone

* cargo fmt

---------

Signed-off-by: dependabot[bot] <[email protected]>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>

---------

Signed-off-by: dependabot[bot] <[email protected]>
Co-authored-by: C. Titus Brown <[email protected]>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
  • Loading branch information
3 people authored Sep 18, 2024
1 parent 116d1e5 commit 373bef3
Show file tree
Hide file tree
Showing 5 changed files with 251 additions and 40 deletions.
6 changes: 3 additions & 3 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ name: oxli-dev
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- rust
- compilers
- maturin>=1,<2
- pytest
- compilers
- rust
- toml
6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,11 @@ dynamic = ["version"]
[tool.maturin]
features = ["pyo3/extension-module"]

[project.optional-dependencies]
test = [
"pytest>=7.0",
"toml>=0.10"
]

[metadata]
license = { file = "LICENSE" }
69 changes: 61 additions & 8 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// Standard library imports
use std::collections::hash_map::IntoIter;
use std::collections::{HashMap, HashSet};

// External crate imports
Expand All @@ -9,10 +10,15 @@ use pyo3::prelude::*;
use sourmash::encodings::HashFunctions;
use sourmash::signature::SeqToHashes;

// Set version variable
const VERSION: &str = env!("CARGO_PKG_VERSION");

#[pyclass]
struct KmerCountTable {
counts: HashMap<u64, u64>,
pub ksize: u8,
version: String,
consumed: u64,
}

#[pymethods]
Expand All @@ -23,6 +29,8 @@ impl KmerCountTable {
Self {
counts: HashMap::new(),
ksize,
version: VERSION.to_string(), // Initialize the version field
consumed: 0, // Initialize the total sequence length tracker
}
}

Expand Down Expand Up @@ -63,7 +71,8 @@ impl KmerCountTable {
"kmer size does not match count table ksize",
))
} else {
let hashval = self.hash_kmer(kmer).unwrap();
self.consumed += kmer.len() as u64;
let hashval = self.hash_kmer(kmer)?;
let count = self.count_hash(hashval);
Ok(count)
}
Expand Down Expand Up @@ -251,11 +260,23 @@ impl KmerCountTable {
self.counts.keys().cloned().collect()
}

// TODO: Getter for the version attribute
// Store oxli version when instance is created
// Attribute to access the version of oxli that the table was created with
#[getter]
pub fn version(&self) -> &str {
&self.version
}

// Attribute to access the total bases processed with count or consume.
#[getter]
pub fn consumed(&self) -> u64 {
self.consumed
}

// TODO: Getter for the consumed seq len attribute
// Update tracker when DNA is processed with count() or consume()
// Getter for the sum of all counts in the table.
#[getter]
pub fn sum_counts(&self) -> u64 {
self.counts.values().sum()
}

// Consume this DNA string. Return number of k-mers consumed.
#[pyo3(signature = (seq, allow_bad_kmers=true))]
Expand Down Expand Up @@ -287,6 +308,9 @@ impl KmerCountTable {
n += 1;
}

// Update the total sequence consumed tracker
self.consumed += seq.len() as u64;

Ok(n)
}

Expand Down Expand Up @@ -338,15 +362,44 @@ impl KmerCountTable {
self.symmetric_difference(other)
}

// Python dunder method for __iter__

// Python dunder method for __next__
// Python __iter__ method to return an iterator
pub fn __iter__(slf: PyRef<Self>) -> KmerCountTableIterator {
KmerCountTableIterator {
inner: slf.counts.clone().into_iter(), // Clone the HashMap and convert to iterator
}
}

// Python dunder method for __len__
fn __len__(&self) -> usize {
self.counts.len()
}

// Python dunder method for __getitem__
fn __getitem__(&self, kmer: String) -> PyResult<u64> {
self.get(kmer)
}

// Python dunder method for __setitem__
pub fn __setitem__(&mut self, kmer: String, count: u64) -> PyResult<()> {
// Calculate the hash for the k-mer
let hashval = self.hash_kmer(kmer)?;
// Set the count for the k-mer
self.counts.insert(hashval, count);
Ok(())
}
}

// Iterator implementation for KmerCountTable
#[pyclass]
pub struct KmerCountTableIterator {
inner: IntoIter<u64, u64>, // Now we own the iterator
}

#[pymethods]
impl KmerCountTableIterator {
pub fn __next__(mut slf: PyRefMut<Self>) -> Option<(u64, u64)> {
slf.inner.next()
}
}

// Python module definition
Expand Down
99 changes: 90 additions & 9 deletions src/python/tests/test_attr.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
import oxli
import pytest
import toml

from pathlib import Path
from test_basic import create_sample_kmer_table


# Test attributes


def test_hashes_attribute():
table = create_sample_kmer_table(3, ["AAA", "TTT", "AAC"])
hashes = table.hashes
Expand All @@ -19,13 +24,89 @@ def test_hashes_attribute():
), ".hashes attribute should match the expected set of hash keys"


def test_version_attr():
'''Check version attribute matches current version.'''
pass
def get_version_from_cargo_toml():
# Path to Cargo.toml relative to the location of the test file
cargo_toml_path = Path(__file__).resolve().parents[3] / "Cargo.toml"

if not cargo_toml_path.exists():
raise FileNotFoundError(f"{cargo_toml_path} not found")

with cargo_toml_path.open("r") as f:
cargo_toml = toml.load(f)

return cargo_toml["package"]["version"]


def test_kmer_count_table_version():
# Create an instance of KmerCountTable with a k-mer size
kmer_table = oxli.KmerCountTable(ksize=31)

# Get the expected version from Cargo.toml
expected_version = get_version_from_cargo_toml()

# Check if the version attribute matches the expected version
assert (
kmer_table.version == expected_version
), f"Expected version {expected_version}, but got {kmer_table.version}"


# Test consumed bases tracker
def test_initial_consumed():
kmer_table = oxli.KmerCountTable(ksize=31)
assert kmer_table.consumed == 0, "Initial consumed should be 0"


def test_consumed_after_count():
kmer_table = oxli.KmerCountTable(ksize=16)
kmer_table.count("ACGTACGTACGTACGT") # Length is 16
assert (
kmer_table.consumed == 16
), "consumed should be updated to 16 after counting k-mer"


def test_consumed_after_consume():
kmer_table = oxli.KmerCountTable(ksize=16)
kmer_table.consume("ACGTACGXACGTACGT", allow_bad_kmers=True) # Length is 16
assert (
kmer_table.consumed == 16
), "consumed should be updated to 16 after consuming sequence"


def test_consumed_after_multiple_operations():
kmer_table = oxli.KmerCountTable(ksize=16)
kmer_table.count("ACGTACGTACGTACGT") # Length is 16
kmer_table.consume("GCTAGCTAGCTA") # Length is 12, but no kmers added as > 16
assert (
kmer_table.consumed == 28
), "consumed should be updated to 28 after multiple operations"


# Test total counts attribute
def test_sum_counts_initial():
kmer_table = oxli.KmerCountTable(ksize=16)
assert kmer_table.sum_counts == 0, "Initial sum_counts should be 0"

def test_total_consumed_seq_len_attr():
'''Should log total seq len consumed.'''
# Individual kmers
# Long seqs with multiple kmers
# Exclude invalid kmers?
pass

def test_sum_counts_after_count():
kmer_table = oxli.KmerCountTable(ksize=16)
kmer_table.count("ACGTACGTACGTACGT") # Counts as 1
assert (
kmer_table.sum_counts == 1
), "sum_counts should be updated to 1 after counting k-mer"


def test_sum_counts_after_consume():
kmer_table = oxli.KmerCountTable(ksize=16)
kmer_table.consume("ACGTACGTACGTACGTA") # Counts as 2 k-mers
assert (
kmer_table.sum_counts == 2
), "sum_counts should be updated after consuming sequence"


def test_sum_counts_after_multiple_operations():
kmer_table = oxli.KmerCountTable(ksize=16)
kmer_table.count("ACGTACGTACGTACGT") # Counts as 1
kmer_table.consume("ACGTACGTACGTACGTA") # Counts as 2 k-mers
assert (
kmer_table.sum_counts == 3
), "sum_counts should be updated after multiple operations"
111 changes: 91 additions & 20 deletions src/python/tests/test_dunders.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,94 @@
from test_basic import create_sample_kmer_table


def test_len_dunder_method():
'''__len__ should return number of keys in KmerCountTable.'''
pass

def test_iter_dunder_method():
'''KmerCountTable should be iterable, yield hash:count pairs'''
pass

def test_next_dunder_method():
'''Select next key in generator'''
pass

def test_getitem_dunder_method():
'''Query an object to using the indexing syntax (obj[key])'''
# Same behaviour as .get()
pass

def test_setitem_dunder_method():
'''Set values using the indexing syntax (obj[key] = value)'''
pass
# Test __len__
def test_len_initial():
kmer_table = oxli.KmerCountTable(ksize=16)
assert len(kmer_table) == 0, "Initial length should be 0"


def test_len_after_count():
kmer_table = oxli.KmerCountTable(ksize=16)
kmer_table.count("ACGTACGTACGTACGT") # Adds 1 unique k-mer
assert len(kmer_table) == 1, "Length should be 1 after adding one unique k-mer"


def test_len_after_multiple_counts():
kmer_table = oxli.KmerCountTable(ksize=16)
kmer_table.count("ACGTACGTACGTACGT") # Adds 1 unique k-mer
kmer_table.count("ACGTACGTACGTACGT") # Adds 1 repeat k-mer
kmer_table.count("CCCCCCCCCCCCCCCC") # Adds 1 unique k-mer
kmer_table.consume("GCTAGCTAGCTA") # Adds 0 k-mers
assert len(kmer_table) == 2, "Length should be 2 after adding two unique k-mers"


# Test iter methods


def test_iterable():
kmer_table = create_sample_kmer_table(3, ["AAA", "TTT", "AAC"])
hash_aaa = kmer_table.hash_kmer("AAA") # 10679328328772601858
hash_ttt = kmer_table.hash_kmer("TTT") # 10679328328772601858
hash_aac = kmer_table.hash_kmer("AAC") # 6579496673972597301

# Collect items from the iterator
items = list(kmer_table)

# Check if the items contain the expected tuples
assert 2 in [
count for _, count in items
], "Counts should be present in the iterated items"
assert 6579496673972597301 in [
key for key, _ in items
], "keys should be present in the iterated items"
assert len(items) == 2, "There should be 2 k-mers in the table"


def test_iter_empty():
kmer_table = oxli.KmerCountTable(ksize=16)

# Collect items from an empty iterator
items = list(kmer_table)

# Ensure that no items are returned from an empty table
assert items == [], "Iterator should be empty for an empty KmerCountTable"


# Test __set__ and __get__


def test_setitem():
"""Set values using the indexing syntax (obj[key] = value)"""
kmer_table = oxli.KmerCountTable(ksize=16)
kmer_table["ACGTACGTACGTACGT"] = 5 # Set count directly
assert (
kmer_table["ACGTACGTACGTACGT"] == 5
), "Value should be 5 after setting with __setitem__"


def test_getitem():
"""Query an object to using the indexing syntax (obj[key])"""
kmer_table = oxli.KmerCountTable(ksize=16)
kmer_table["ACGTACGTACGTACGT"] = 5
assert (
kmer_table["ACGTACGTACGTACGT"] == 5
), "Value should be 5 after setting with __setitem__"
assert kmer_table["ACGTACGTACGTACGT"] == kmer_table.get(
"ACGTACGTACGTACGT"
), "Behaviour should be same as .get()"

# Check for a k-mer that does not exist
assert (
kmer_table["CCCCCCCCCCCCCCCC"] == 0
), "Default value for non-existent k-mer should be 0"


def test_setitem_update():
kmer_table = oxli.KmerCountTable(ksize=16)
kmer_table.count("ACGTACGTACGTACGT") # Set count to 1
kmer_table["ACGTACGTACGTACGT"] = 5 # Update count to 5
assert kmer_table.get("ACGTACGTACGTACGT") == 5
kmer_table["ACGTACGTACGTACGT"] = 10 # Update the count
assert (
kmer_table["ACGTACGTACGTACGT"] == 10
), "Value should be updated to 10 after setting with __setitem__"

0 comments on commit 373bef3

Please sign in to comment.