From b85caf2bed319b5fa354495a86dca76910a9cb1b Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 10 Nov 2024 11:07:06 -0800 Subject: [PATCH] some scaled stuff --- src/python/tests/test_manysearch.py | 30 ++++++++++++++++++++++++++++ src/python/tests/test_multisearch.py | 6 ++++-- src/python/tests/test_pairwise.py | 22 ++++++++++++++++++++ 3 files changed, 56 insertions(+), 2 deletions(-) diff --git a/src/python/tests/test_manysearch.py b/src/python/tests/test_manysearch.py index f9283dfd..a91675ea 100644 --- a/src/python/tests/test_manysearch.py +++ b/src/python/tests/test_manysearch.py @@ -379,6 +379,36 @@ def test_simple_threshold(runtmp, indexed, zip_query): assert len(df) == 3 +def test_simple_scaled(runtmp, indexed, zip_query): + # test with a different scaled + query_list = runtmp.output("query.txt") + against_list = runtmp.output("against.txt") + + sig2 = get_test_data("2.fa.sig.gz") + sig47 = get_test_data("47.fa.sig.gz") + sig63 = get_test_data("63.fa.sig.gz") + + make_file_list(query_list, [sig2, sig47, sig63]) + make_file_list(against_list, [sig2, sig47, sig63]) + + if indexed: + against_list = index_siglist(runtmp, against_list, runtmp.output("db")) + + if zip_query: + query_list = zip_siglist(runtmp, query_list, runtmp.output("query.zip")) + + output = runtmp.output("out.csv") + + runtmp.sourmash( + "scripts", "manysearch", query_list, against_list, "-o", output, "-s", "10_000" + ) + assert os.path.exists(output) + + df = pandas.read_csv(output) + assert len(df) == 3 + assert set(list(df['scaled'])) == {10000} + + def test_simple_manifest(runtmp, indexed): # test with a simple threshold => only 3 results query_list = runtmp.output("query.txt") diff --git a/src/python/tests/test_multisearch.py b/src/python/tests/test_multisearch.py index ef018a96..123b3872 100644 --- a/src/python/tests/test_multisearch.py +++ b/src/python/tests/test_multisearch.py @@ -1195,9 +1195,11 @@ def test_mismatched_scaled_query(runtmp): output = runtmp.output("out.csv") - runtmp.sourmash("scripts", "multisearch", query_list, against_list, "-o", output) + runtmp.sourmash("scripts", "multisearch", query_list, against_list, "-o", output, '-s', '10_000') assert os.path.exists(output) - + df = pandas.read_csv(output) + assert len(df) == 5 + assert set(list(df['scaled'])) == {10_000} def test_mismatched_scaled_against(runtmp): # test what happens if against scaled is too high diff --git a/src/python/tests/test_pairwise.py b/src/python/tests/test_pairwise.py index bd54c5cd..ce9d94f7 100644 --- a/src/python/tests/test_pairwise.py +++ b/src/python/tests/test_pairwise.py @@ -728,3 +728,25 @@ def test_simple_below_threshold_write_all_no_ani(runtmp): assert float(row["jaccard"]) == 1.0 assert row["query_name"] == row["match_name"] assert row["query_md5"] == row["match_md5"] + + +def test_simple_scaled(runtmp): + # test basic execution w/scaled! + query_list = runtmp.output("query.txt") + against_list = runtmp.output("against.txt") + + sig2 = get_test_data("2.fa.sig.gz") + sig47 = get_test_data("47.fa.sig.gz") + sig63 = get_test_data("63.fa.sig.gz") + + make_file_list(query_list, [sig2, sig47, sig63]) + + output = runtmp.output("out.csv") + + runtmp.sourmash( + "scripts", "pairwise", query_list, "-o", output, "-s", "10_000" + ) + assert os.path.exists(output) + df = pandas.read_csv(output) + assert len(df) == 1 + assert set(list(df['scaled'])) == {10_000}