Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MRG: update tests for distance metrics to test symmetry, with a bit of paranoia thrown in #48

Merged
merged 2 commits into from
Sep 23, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 15 additions & 5 deletions src/python/tests/test_metrics.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from math import isclose
from oxli import KmerCountTable
from scipy.spatial.distance import cosine
import numpy as np
import pytest
from scipy.spatial.distance import cosine

from oxli import KmerCountTable

# Cosine similarity tests

Expand Down Expand Up @@ -35,6 +36,7 @@ def test_cosine_similarity_identical_tables():
# Cosine similarity between identical tables should be 1.0
# Allow value within 0.001%
assert isclose(kct1.cosine(kct2), 1.0, rel_tol=1e-5)
assert isclose(kct2.cosine(kct1), 1.0, rel_tol=1e-5)

# Using scipy to calculate the expected value
vector1 = [5, 3, 1, 4, 6]
Expand All @@ -43,6 +45,7 @@ def test_cosine_similarity_identical_tables():

# Allow value within 0.001%
assert isclose(kct1.cosine(kct2), expected_cosine_sim, rel_tol=1e-5)
assert isclose(kct2.cosine(kct1), expected_cosine_sim, rel_tol=1e-5)


def test_cosine_similarity_different_tables():
Expand Down Expand Up @@ -77,6 +80,7 @@ def test_cosine_similarity_different_tables():

# Allow value within 0.001%
assert isclose(kct1.cosine(kct2), expected_cosine_sim, rel_tol=1e-5)
assert isclose(kct2.cosine(kct1), expected_cosine_sim, rel_tol=1e-5)


def test_cosine_similarity_empty_table():
Expand Down Expand Up @@ -111,6 +115,7 @@ def test_cosine_similarity_empty_table():
expected_cosine_sim = 1 - cosine(vector1, vector2)

assert isclose(kct1.cosine(kct2), expected_cosine_sim, rel_tol=1e-5)
assert isclose(kct2.cosine(kct1), expected_cosine_sim, rel_tol=1e-5)


def test_cosine_similarity_both_empty():
Expand All @@ -123,6 +128,7 @@ def test_cosine_similarity_both_empty():

# Cosine similarity should be 0.0 for two empty tables
assert kct1.cosine(kct2) == 0.0
assert kct2.cosine(kct1) == 0.0


def test_cosine_similarity_partial_overlap():
Expand All @@ -136,7 +142,7 @@ def test_cosine_similarity_partial_overlap():
kct2 = KmerCountTable(ksize=4)

# Manually set k-mer counts for kct1
kct1["AAAA"] = 0 # Not in kct2
# kct1["AAAA"] = 0 # Not in kct2
kct1["AATT"] = 3
kct1["GGGG"] = 1
kct1["CCAA"] = 4
Expand All @@ -157,11 +163,10 @@ def test_cosine_similarity_partial_overlap():

# Cosine similarity is expected to be > 0 but < 1
assert isclose(kct1.cosine(kct2), expected_cosine_sim, rel_tol=1e-5)
assert isclose(kct2.cosine(kct1), expected_cosine_sim, rel_tol=1e-5)


# Jaccard coefficient similarity tests
import pytest
from oxli import KmerCountTable


def test_jaccard_similarity_identical_tables():
Expand All @@ -186,6 +191,7 @@ def test_jaccard_similarity_identical_tables():

# Jaccard similarity should be 1.0 for identical sets
assert kct1.jaccard(kct2) == 1.0
assert kct2.jaccard(kct1) == 1.0


def test_jaccard_similarity_different_tables():
Expand All @@ -206,6 +212,7 @@ def test_jaccard_similarity_different_tables():

# Expected result: 0 overlap between the sets
assert kct1.jaccard(kct2) == 0.0
assert kct2.jaccard(kct1) == 0.0


def test_jaccard_similarity_partial_overlap():
Expand All @@ -229,6 +236,7 @@ def test_jaccard_similarity_partial_overlap():

# Calculate expected Jaccard similarity: intersection {AAAA, AATT}, union {AAAA, TTTT, AATT, GGGG}
assert kct1.jaccard(kct2) == 2 / 4
assert kct2.jaccard(kct1) == 2 / 4


def test_jaccard_similarity_empty_table():
Expand All @@ -246,6 +254,7 @@ def test_jaccard_similarity_empty_table():

# kct2 is empty
assert kct1.jaccard(kct2) == 0.0
assert kct2.jaccard(kct1) == 0.0


def test_jaccard_similarity_both_empty():
Expand All @@ -259,3 +268,4 @@ def test_jaccard_similarity_both_empty():

# Both tables are empty
assert kct1.jaccard(kct2) == 1.0
assert kct2.jaccard(kct1) == 1.0