diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml new file mode 100644 index 00000000..f29d30a2 --- /dev/null +++ b/.github/workflows/black.yml @@ -0,0 +1,13 @@ +name: Lint + +on: [push, pull_request] + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: psf/black@stable + with: + src: "./src/python" + options: "--check --verbose" \ No newline at end of file diff --git a/src/python/sourmash_plugin_branchwater/__init__.py b/src/python/sourmash_plugin_branchwater/__init__.py index 5c2f5d37..346277e1 100755 --- a/src/python/sourmash_plugin_branchwater/__init__.py +++ b/src/python/sourmash_plugin_branchwater/__init__.py @@ -456,12 +456,15 @@ def __init__(self, p): p.add_argument( "-a", "--ani", action="store_true", help="estimate ANI from containment" ) +<<<<<<< HEAD p.add_argument( "-p", "--prob", action="store_true", help="estimate probability of overlap for significance ranking of search results", ) +======= +>>>>>>> 31d3056 (Python Black Formatting (#492)) def main(self, args): print_version() @@ -474,9 +477,12 @@ def main(self, args): notify( f"searching all sketches in '{args.query_paths}' against '{args.against_paths}' using {num_threads} threads" ) +<<<<<<< HEAD notify( f"estimate ani? {args.ani} / estimate probability of overlap? {args.prob}" ) +======= +>>>>>>> 31d3056 (Python Black Formatting (#492)) super().main(args) status = sourmash_plugin_branchwater.do_multisearch( @@ -487,7 +493,10 @@ def main(self, args): args.scaled, args.moltype, args.ani, +<<<<<<< HEAD args.prob, +======= +>>>>>>> 31d3056 (Python Black Formatting (#492)) args.output, ) if status == 0: diff --git a/src/python/tests/test_multisearch.py b/src/python/tests/test_multisearch.py index 40620bc9..56684325 100644 --- a/src/python/tests/test_multisearch.py +++ b/src/python/tests/test_multisearch.py @@ -11,10 +11,13 @@ zip_siglist, index_siglist, ) +<<<<<<< HEAD def float_round(string: str, ndigits=None): return round(float(string), ndigits) +======= +>>>>>>> 31d3056 (Python Black Formatting (#492)) def test_installed(runtmp): @@ -61,9 +64,12 @@ def test_simple_no_ani(runtmp, zip_query, zip_db): assert float(row["containment"] == 1.0) assert float(row["jaccard"] == 1.0) assert float(row["max_containment"] == 1.0) +<<<<<<< HEAD # assert float(row['prob_overlap'] == 1.0) # assert float(row['prob_overlap_adjusted'] == 1.0) # assert float(row['containment_adjusted'] == 1.0) +======= +>>>>>>> 31d3056 (Python Black Formatting (#492)) assert "query_containment_ani" not in row assert "match_containment_ani" not in row assert "average_containment_ani" not in row @@ -73,9 +79,15 @@ def test_simple_no_ani(runtmp, zip_query, zip_db): # confirm hand-checked numbers q = row["query_name"].split()[0] m = row["match_name"].split()[0] +<<<<<<< HEAD cont = float_round(row["containment"], 4) jaccard = float_round(row["jaccard"], 4) maxcont = float_round(row["max_containment"], 4) +======= + cont = float(row["containment"]) + jaccard = float(row["jaccard"]) + maxcont = float(row["max_containment"]) +>>>>>>> 31d3056 (Python Black Formatting (#492)) intersect_hashes = int(row["intersect_hashes"]) print(q, m, f"{jaccard:.04}", f"{cont:.04}", f"{maxcont:.04}") @@ -109,6 +121,10 @@ def test_simple_prob_overlap(runtmp, zip_query, zip_db, indexed_query, indexed_a if zip_db: against_list = zip_siglist(runtmp, against_list, runtmp.output("db.zip")) +<<<<<<< HEAD +======= + +>>>>>>> 31d3056 (Python Black Formatting (#492)) if zip_query: query_list = zip_siglist(runtmp, query_list, runtmp.output("query.zip")) @@ -119,7 +135,11 @@ def test_simple_prob_overlap(runtmp, zip_query, zip_db, indexed_query, indexed_a against_list = index_siglist(runtmp, against_list, runtmp.output("db")) runtmp.sourmash( +<<<<<<< HEAD "scripts", "multisearch", query_list, against_list, "-o", output, "--prob" +======= + "scripts", "multisearch", query_list, against_list, "-o", output, "--ani" +>>>>>>> 31d3056 (Python Black Formatting (#492)) ) assert os.path.exists(output) @@ -136,6 +156,7 @@ def test_simple_prob_overlap(runtmp, zip_query, zip_db, indexed_query, indexed_a assert float(row["containment"] == 1.0) assert float(row["jaccard"] == 1.0) assert float(row["max_containment"] == 1.0) +<<<<<<< HEAD assert "query_containment_ani" not in row assert "match_containment_ani" not in row assert "average_containment_ani" not in row @@ -147,11 +168,18 @@ def test_simple_prob_overlap(runtmp, zip_query, zip_db, indexed_query, indexed_a assert float_round(row["containment_adjusted"], 4) == 2377.5947 assert float_round(row["containment_adjusted_log10"], 4) == 2377.5947 assert float_round(row["tf_idf_score"], 4) == 1.4974 +======= + assert float(row["query_containment_ani"] == 1.0) + assert float(row["match_containment_ani"] == 1.0) + assert float(row["average_containment_ani"] == 1.0) + assert float(row["max_containment_ani"] == 1.0) +>>>>>>> 31d3056 (Python Black Formatting (#492)) else: # confirm hand-checked numbers q = row["query_name"].split()[0] m = row["match_name"].split()[0] +<<<<<<< HEAD cont = float_round(row["containment"], 4) jaccard = float_round(row["jaccard"], 4) maxcont = float_round(row["max_containment"], 4) @@ -264,6 +292,36 @@ def test_simple_ani(runtmp, zip_query, zip_db, indexed_query, indexed_against): f"{max_ani:.04}", ) +======= + cont = float(row["containment"]) + jaccard = float(row["jaccard"]) + maxcont = float(row["max_containment"]) + intersect_hashes = int(row["intersect_hashes"]) + q1_ani = float(row["query_containment_ani"]) + q2_ani = float(row["match_containment_ani"]) + avg_ani = float(row["average_containment_ani"]) + max_ani = float(row["max_containment_ani"]) + + jaccard = round(jaccard, 4) + cont = round(cont, 4) + maxcont = round(maxcont, 4) + q1_ani = round(q1_ani, 4) + q2_ani = round(q2_ani, 4) + avg_ani = round(avg_ani, 4) + max_ani = round(max_ani, 4) + print( + q, + m, + f"{jaccard:.04}", + f"{cont:.04}", + f"{maxcont:.04}", + f"{q1_ani:.04}", + f"{q2_ani:.04}", + f"{avg_ani:.04}", + f"{max_ani:.04}", + ) + +>>>>>>> 31d3056 (Python Black Formatting (#492)) if q == "NC_011665.1" and m == "NC_009661.1": assert jaccard == 0.3207 assert cont == 0.4828 @@ -994,6 +1052,7 @@ def test_simple_prot(runtmp): # confirm hand-checked numbers q = row["query_name"].split()[0] m = row["match_name"].split()[0] +<<<<<<< HEAD cont = float_round(row["containment"], 4) jaccard = float_round(row["jaccard"], 4) maxcont = float_round(row["max_containment"], 4) @@ -1003,6 +1062,24 @@ def test_simple_prot(runtmp): avg_ani = float_round(row["average_containment_ani"], 4) max_ani = float_round(row["max_containment_ani"], 4) +======= + cont = float(row["containment"]) + jaccard = float(row["jaccard"]) + maxcont = float(row["max_containment"]) + intersect_hashes = int(row["intersect_hashes"]) + q1_ani = float(row["query_containment_ani"]) + q2_ani = float(row["match_containment_ani"]) + avg_ani = float(row["average_containment_ani"]) + max_ani = float(row["max_containment_ani"]) + + jaccard = round(jaccard, 4) + cont = round(cont, 4) + maxcont = round(maxcont, 4) + q1_ani = round(q1_ani, 4) + q2_ani = round(q2_ani, 4) + avg_ani = round(avg_ani, 4) + max_ani = round(max_ani, 4) +>>>>>>> 31d3056 (Python Black Formatting (#492)) print( q, m, @@ -1209,6 +1286,7 @@ def test_simple_dayhoff(runtmp): # confirm hand-checked numbers q = row["query_name"].split()[0] m = row["match_name"].split()[0] +<<<<<<< HEAD cont = float_round(row["containment"], 4) jaccard = float_round(row["jaccard"], 4) maxcont = float_round(row["max_containment"], 4) @@ -1218,6 +1296,24 @@ def test_simple_dayhoff(runtmp): avg_ani = float_round(row["average_containment_ani"], 4) max_ani = float_round(row["max_containment_ani"], 4) +======= + cont = float(row["containment"]) + jaccard = float(row["jaccard"]) + maxcont = float(row["max_containment"]) + intersect_hashes = int(row["intersect_hashes"]) + q1_ani = float(row["query_containment_ani"]) + q2_ani = float(row["match_containment_ani"]) + avg_ani = float(row["average_containment_ani"]) + max_ani = float(row["max_containment_ani"]) + + jaccard = round(jaccard, 4) + cont = round(cont, 4) + maxcont = round(maxcont, 4) + q1_ani = round(q1_ani, 4) + q2_ani = round(q2_ani, 4) + avg_ani = round(avg_ani, 4) + max_ani = round(max_ani, 4) +>>>>>>> 31d3056 (Python Black Formatting (#492)) print( q, m, @@ -1297,6 +1393,7 @@ def test_simple_hp(runtmp): # confirm hand-checked numbers q = row["query_name"].split()[0] m = row["match_name"].split()[0] +<<<<<<< HEAD cont = float_round(row["containment"], 4) jaccard = float_round(row["jaccard"], 4) maxcont = float_round(row["max_containment"], 4) @@ -1306,6 +1403,24 @@ def test_simple_hp(runtmp): avg_ani = float_round(row["average_containment_ani"], 4) max_ani = float_round(row["max_containment_ani"], 4) +======= + cont = float(row["containment"]) + jaccard = float(row["jaccard"]) + maxcont = float(row["max_containment"]) + intersect_hashes = int(row["intersect_hashes"]) + q1_ani = float(row["query_containment_ani"]) + q2_ani = float(row["match_containment_ani"]) + avg_ani = float(row["average_containment_ani"]) + max_ani = float(row["max_containment_ani"]) + + jaccard = round(jaccard, 4) + cont = round(cont, 4) + maxcont = round(maxcont, 4) + q1_ani = round(q1_ani, 4) + q2_ani = round(q2_ani, 4) + avg_ani = round(avg_ani, 4) + max_ani = round(max_ani, 4) +>>>>>>> 31d3056 (Python Black Formatting (#492)) print( q, m, diff --git a/src/python/tests/test_sketch.py b/src/python/tests/test_sketch.py index 4d47545f..1e582fb3 100644 --- a/src/python/tests/test_sketch.py +++ b/src/python/tests/test_sketch.py @@ -961,7 +961,11 @@ def test_manysketch_prefix2(runtmp, capfd): dna_prefix = os.path.join( fa_path, "short" ) # need to avoid matching short-protein.fa +<<<<<<< HEAD prot_prefix = os.path.join(fa_path, "*protein.fa") +======= + prot_prefix = os.path.join(fa_path, "*protein") +>>>>>>> 31d3056 (Python Black Formatting (#492)) zip_exclude = os.path.join(fa_path, "*zip") # make prefix input file @@ -1042,7 +1046,10 @@ def test_manysketch_prefix2(runtmp, capfd): for sig in sigs: assert sig.name in expected_signames if sig.name == "short": +<<<<<<< HEAD # minhash is not defined? How does this test work? +======= +>>>>>>> 31d3056 (Python Black Formatting (#492)) assert sig, minhash.hashes == sig1.minhash.hashes if sig.name == "short_protein": assert sig == sig2 @@ -1119,7 +1126,11 @@ def test_manysketch_prefix_duplicated_force(runtmp, capfd): dna_prefix = os.path.join( fa_path, "short" ) # need to avoid matching short-protein.fa +<<<<<<< HEAD prot_prefix = os.path.join(fa_path, "*protein*fa") +======= + prot_prefix = os.path.join(fa_path, "*protein") +>>>>>>> 31d3056 (Python Black Formatting (#492)) zip_exclude = os.path.join(fa_path, "*zip") # make prefix input file