From f7a3a12d5bb25b6f499af1f88aaaaad40bfe672d Mon Sep 17 00:00:00 2001
From: "C. Titus Brown" <titus@idyll.org>
Date: Tue, 17 Dec 2024 06:36:44 -0800
Subject: [PATCH 1/5] just the one commit, ma'am (#547)

---
 src/python/tests/sourmash_tst_utils.py | 45 +++++---------------------
 1 file changed, 8 insertions(+), 37 deletions(-)

diff --git a/src/python/tests/sourmash_tst_utils.py b/src/python/tests/sourmash_tst_utils.py
index 86c97c57..0bb30b14 100644
--- a/src/python/tests/sourmash_tst_utils.py
+++ b/src/python/tests/sourmash_tst_utils.py
@@ -7,8 +7,7 @@
 import collections
 import pprint
 
-import pkg_resources
-from pkg_resources import Requirement, resource_filename, ResolutionError
+import importlib.metadata
 import traceback
 from io import open  # pylint: disable=redefined-builtin
 from io import StringIO
@@ -61,46 +60,18 @@ def index_siglist(
     return db
 
 
-def scriptpath(scriptname="sourmash"):
-    """Return the path to the scripts, in both dev and install situations."""
-    # note - it doesn't matter what the scriptname is here, as long as
-    # it's some script present in this version of sourmash.
-
-    path = os.path.join(os.path.dirname(__file__), "../")
-    if os.path.exists(os.path.join(path, scriptname)):
-        return path
-
-    path = os.path.join(os.path.dirname(__file__), "../../EGG-INFO/")
-    if os.path.exists(os.path.join(path, scriptname)):
-        return path
-
-    for path in os.environ["PATH"].split(":"):
-        if os.path.exists(os.path.join(path, scriptname)):
-            return path
-
-
 def _runscript(scriptname):
     """Find & run a script with exec (i.e. not via os.system or subprocess)."""
     namespace = {"__name__": "__main__"}
     namespace["sys"] = globals()["sys"]
 
-    try:
-        pkg_resources.load_entry_point("sourmash", "console_scripts", scriptname)()
-        return 0
-    except pkg_resources.ResolutionError:
-        pass
-
-    path = scriptpath()
-
-    scriptfile = os.path.join(path, scriptname)
-    if os.path.isfile(scriptfile):
-        if os.path.isfile(scriptfile):
-            exec(  # pylint: disable=exec-used
-                compile(open(scriptfile).read(), scriptfile, "exec"), namespace
-            )
-            return 0
-
-    return -1
+    entry_points = importlib.metadata.entry_points(
+        group="console_scripts", name="sourmash"
+    )
+    assert len(entry_points) == 1
+    smash_cli = tuple(entry_points)[0].load()
+    smash_cli()
+    return 0
 
 
 ScriptResults = collections.namedtuple("ScriptResults", ["status", "out", "err"])

From 18f4e0733850e7b88438d1571dc40bbbfb448ea1 Mon Sep 17 00:00:00 2001
From: "C. Titus Brown" <titus@idyll.org>
Date: Wed, 18 Dec 2024 10:31:59 -0800
Subject: [PATCH 2/5] update to sourmash prospective v0.18.0

---
 Cargo.lock | 82 ++++++++++++++++++++++++++----------------------------
 Cargo.toml |  3 +-
 2 files changed, 42 insertions(+), 43 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index b1e7a9aa..d74fc93a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -206,7 +206,7 @@ dependencies = [
  "bitflags",
  "cexpr",
  "clang-sys",
- "itertools 0.12.1",
+ "itertools 0.11.0",
  "lazy_static",
  "lazycell",
  "proc-macro2",
@@ -808,10 +808,11 @@ dependencies = [
 
 [[package]]
 name = "js-sys"
-version = "0.3.72"
+version = "0.3.76"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9"
+checksum = "6717b6b5b077764fb5966237269cb3c64edddde4b14ce42647430a78ced9e7b7"
 dependencies = [
+ "once_cell",
  "wasm-bindgen",
 ]
 
@@ -972,13 +973,12 @@ checksum = "a2983372caf4480544083767bf2d27defafe32af49ab4df3a0b7fc90793a3664"
 
 [[package]]
 name = "nalgebra"
-version = "0.32.6"
+version = "0.33.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7b5c17de023a86f59ed79891b2e5d5a94c705dbe904a5b5c9c952ea6221b03e4"
+checksum = "26aecdf64b707efd1310e3544d709c5c0ac61c13756046aaaba41be5c4f66a3b"
 dependencies = [
  "approx",
  "matrixmultiply",
- "nalgebra-macros",
  "num-complex",
  "num-rational",
  "num-traits",
@@ -988,17 +988,6 @@ dependencies = [
  "typenum",
 ]
 
-[[package]]
-name = "nalgebra-macros"
-version = "0.2.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "254a5372af8fc138e36684761d3c0cdb758a4410e938babcff1c860ce14ddbfc"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.87",
-]
-
 [[package]]
 name = "ndarray"
 version = "0.15.6"
@@ -1029,9 +1018,9 @@ dependencies = [
 
 [[package]]
 name = "needletail"
-version = "0.6.0"
+version = "0.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f29a3c5015d6985f33318d154fa0c41315eb2e7df29432c844c74a83434bfe21"
+checksum = "de3de09e373770238e3d30eb1a9f09f4754134d0ef354d0570bc1203d2517257"
 dependencies = [
  "buffer-redux",
  "bytecount",
@@ -1075,6 +1064,16 @@ version = "0.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be"
 
+[[package]]
+name = "num-bigint"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9"
+dependencies = [
+ "num-integer",
+ "num-traits",
+]
+
 [[package]]
 name = "num-complex"
 version = "0.4.6"
@@ -1110,6 +1109,7 @@ version = "0.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824"
 dependencies = [
+ "num-bigint",
  "num-integer",
  "num-traits",
 ]
@@ -1585,9 +1585,9 @@ dependencies = [
 
 [[package]]
 name = "roaring"
-version = "0.10.6"
+version = "0.10.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8f4b84ba6e838ceb47b41de5194a60244fac43d9fe03b71dbe8c5a201081d6d1"
+checksum = "41589aba99537475bf697f2118357cad1c31590c5a1b9f6d9fc4ad6d07503661"
 dependencies = [
  "bytemuck",
  "byteorder",
@@ -1735,9 +1735,9 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
 
 [[package]]
 name = "simba"
-version = "0.8.1"
+version = "0.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "061507c94fc6ab4ba1c9a0305018408e312e17c041eb63bef8aa726fa33aceae"
+checksum = "b3a386a501cd104797982c15ae17aafe8b9261315b5d07e3ec803f2ea26be0fa"
 dependencies = [
  "approx",
  "num-complex",
@@ -1772,9 +1772,8 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7"
 
 [[package]]
 name = "sourmash"
-version = "0.17.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "54e30f752d984b1d8456024973f8d89772b4ba248f592b77b57d59ad27a232a0"
+version = "0.18.0"
+source = "git+https://github.com/sourmash-bio/sourmash.git?branch=latest#f4f5187e7dc9b9c177e099bbf7f3f42556867328"
 dependencies = [
  "az",
  "byteorder",
@@ -1794,7 +1793,7 @@ dependencies = [
  "md5",
  "memmap2",
  "murmurhash3",
- "needletail 0.6.0",
+ "needletail 0.6.1",
  "niffler",
  "nohash-hasher",
  "num-iter",
@@ -1856,9 +1855,9 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
 
 [[package]]
 name = "statrs"
-version = "0.17.1"
+version = "0.18.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f697a07e4606a0a25c044de247e583a330dbb1731d11bc7350b81f48ad567255"
+checksum = "2a3fe7c28c6512e766b0874335db33c94ad7b8f9054228ae1c2abd47ce7d335e"
 dependencies = [
  "approx",
  "nalgebra",
@@ -2117,9 +2116,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
 
 [[package]]
 name = "wasm-bindgen"
-version = "0.2.95"
+version = "0.2.99"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e"
+checksum = "a474f6281d1d70c17ae7aa6a613c87fce69a127e2624002df63dcb39d6cf6396"
 dependencies = [
  "cfg-if",
  "once_cell",
@@ -2128,13 +2127,12 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-backend"
-version = "0.2.95"
+version = "0.2.99"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358"
+checksum = "5f89bb38646b4f81674e8f5c3fb81b562be1fd936d84320f3264486418519c79"
 dependencies = [
  "bumpalo",
  "log",
- "once_cell",
  "proc-macro2",
  "quote",
  "syn 2.0.87",
@@ -2143,9 +2141,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-macro"
-version = "0.2.95"
+version = "0.2.99"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56"
+checksum = "2cc6181fd9a7492eef6fef1f33961e3695e4579b9872a6f7c83aee556666d4fe"
 dependencies = [
  "quote",
  "wasm-bindgen-macro-support",
@@ -2153,9 +2151,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-macro-support"
-version = "0.2.95"
+version = "0.2.99"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68"
+checksum = "30d7a95b763d3c45903ed6c81f156801839e5ee968bb07e534c44df0fcd330c2"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -2166,15 +2164,15 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-shared"
-version = "0.2.95"
+version = "0.2.99"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d"
+checksum = "943aab3fdaaa029a6e0271b35ea10b72b943135afe9bffca82384098ad0e06a6"
 
 [[package]]
 name = "web-sys"
-version = "0.3.72"
+version = "0.3.76"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f6488b90108c040df0fe62fa815cbdee25124641df01814dd7282749234c6112"
+checksum = "04dd7223427d52553d3702c004d3b2fe07c148165faa56313cb00211e31c12bc"
 dependencies = [
  "js-sys",
  "wasm-bindgen",
diff --git a/Cargo.toml b/Cargo.toml
index 4a845512..24234de3 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,7 +12,8 @@ crate-type = ["cdylib"]
 pyo3 = { version = "0.23.2", features = ["extension-module", "anyhow"] }
 rayon = "1.10.0"
 serde = { version = "1.0.216", features = ["derive"] }
-sourmash = { version = "0.17.2", features = ["branchwater"] }
+#sourmash = { version = "0.17.2", features = ["branchwater"] }
+sourmash = { git = "https://github.com/sourmash-bio/sourmash.git", branch = "latest", features = ["branchwater"] }
 serde_json = "1.0.133"
 niffler = "2.4.0"
 log = "0.4.22"

From b510d8124055e1736485fdc76c4e74e70482b45c Mon Sep 17 00:00:00 2001
From: "C. Titus Brown" <titus@idyll.org>
Date: Wed, 18 Dec 2024 11:17:56 -0800
Subject: [PATCH 3/5] remove tests for bad zip files :sweat_smile:

---
 src/python/tests/test_fastgather.py      | 34 ---------------
 src/python/tests/test_fastmultigather.py | 55 ------------------------
 src/python/tests/test_index.py           | 21 ---------
 src/python/tests/test_manysearch.py      | 27 ------------
 src/python/tests/test_multisearch.py     | 27 ------------
 src/python/tests/test_pairwise.py        | 23 ----------
 6 files changed, 187 deletions(-)

diff --git a/src/python/tests/test_fastgather.py b/src/python/tests/test_fastgather.py
index e3ab4255..2fb1ed78 100644
--- a/src/python/tests/test_fastgather.py
+++ b/src/python/tests/test_fastgather.py
@@ -427,40 +427,6 @@ def test_bad_against_2(runtmp, capfd):
     )
 
 
-def test_bad_against_3(runtmp, capfd):
-    # test with a bad against (a .sig.gz file renamed as zip file)
-    query = get_test_data("SRR606249.sig.gz")
-
-    sig2 = get_test_data("2.fa.sig.gz")
-    against_zip = runtmp.output("against.zip")
-    # cp sig2 into against_zip
-    with open(against_zip, "wb") as fp:
-        with open(sig2, "rb") as fp2:
-            fp.write(fp2.read())
-
-    g_output = runtmp.output("gather.csv")
-    p_output = runtmp.output("prefetch.csv")
-
-    with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash(
-            "scripts",
-            "fastgather",
-            query,
-            against_zip,
-            "-o",
-            g_output,
-            "--output-prefetch",
-            p_output,
-            "-s",
-            "100000",
-        )
-
-    captured = capfd.readouterr()
-    print(captured.err)
-
-    assert "InvalidArchive" in captured.err
-
-
 @pytest.mark.xfail(reason="should work, bug")
 def test_against_multisigfile(runtmp, zip_against):
     # test against a sigfile that contains multiple sketches
diff --git a/src/python/tests/test_fastmultigather.py b/src/python/tests/test_fastmultigather.py
index 9d9c17e7..d47f62ad 100644
--- a/src/python/tests/test_fastmultigather.py
+++ b/src/python/tests/test_fastmultigather.py
@@ -561,34 +561,6 @@ def test_sig_query(runtmp, capfd, indexed):
         }.issubset(keys)
 
 
-def test_bad_query(runtmp, capfd, indexed):
-    # test with a bad query (a .sig.gz file renamed as zip file)
-    against_list = runtmp.output("against.txt")
-
-    sig2 = get_test_data("2.fa.sig.gz")
-    sig47 = get_test_data("47.fa.sig.gz")
-    sig63 = get_test_data("63.fa.sig.gz")
-
-    query_zip = runtmp.output("query.zip")
-    # cp sig2 into query_zip
-    with open(query_zip, "wb") as fp:
-        with open(sig2, "rb") as fp2:
-            fp.write(fp2.read())
-
-    make_file_list(against_list, [sig2, sig47, sig63])
-
-    if indexed:
-        against_list = index_siglist(runtmp, against_list, runtmp.output("db"))
-
-    with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash("scripts", "fastmultigather", query_zip, against_list)
-
-    captured = capfd.readouterr()
-    print(captured.err)
-
-    assert "InvalidArchive" in captured.err
-
-
 def test_missing_query(runtmp, capfd, indexed):
     # test missing query
     query_list = runtmp.output("query.txt")
@@ -736,33 +708,6 @@ def test_bad_against(runtmp, capfd):
     )
 
 
-def test_bad_against_2(runtmp, capfd, zip_query):
-    # test with a bad against (a .sig.gz file renamed as zip file)
-    query = get_test_data("SRR606249.sig.gz")
-    query_list = runtmp.output("query.txt")
-    make_file_list(query_list, [query])
-
-    sig2 = get_test_data("2.fa.sig.gz")
-    against_zip = runtmp.output("against.zip")
-    # cp sig2 into query_zip
-    with open(against_zip, "wb") as fp:
-        with open(sig2, "rb") as fp2:
-            fp.write(fp2.read())
-
-    if zip_query:
-        query_list = zip_siglist(runtmp, query_list, runtmp.output("query.zip"))
-
-    with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash(
-            "scripts", "fastmultigather", query_list, against_zip, "-s", "100000"
-        )
-
-    captured = capfd.readouterr()
-    print(captured.err)
-
-    assert "InvalidArchive" in captured.err
-
-
 def test_empty_against(runtmp, capfd):
     # test bad 'against' file - in this case, an empty one
     query = get_test_data("SRR606249.sig.gz")
diff --git a/src/python/tests/test_index.py b/src/python/tests/test_index.py
index 72d40bee..4d7a5f04 100644
--- a/src/python/tests/test_index.py
+++ b/src/python/tests/test_index.py
@@ -429,27 +429,6 @@ def test_index_zipfile_multiparam(runtmp, capfd, toggle_internal_storage):
         runtmp.sourmash("scripts", "index", zipf, "-o", output, toggle_internal_storage)
 
 
-def test_index_zipfile_bad(runtmp, capfd):
-    # test with a bad input zipfile (a .sig.gz file renamed as zip file)
-    sig2 = get_test_data("2.fa.sig.gz")
-
-    query_zip = runtmp.output("query.zip")
-    # cp sig2 into query_zip
-    with open(query_zip, "wb") as fp:
-        with open(sig2, "rb") as fp2:
-            fp.write(fp2.read())
-
-    output = runtmp.output("out.csv")
-
-    with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash("scripts", "index", query_zip, "-o", output)
-
-    captured = capfd.readouterr()
-    print(captured.err)
-
-    assert "Couldn't find End Of Central Directory Record" in captured.err
-
-
 def test_index_check(runtmp, toggle_internal_storage):
     # test check index
     siglist = runtmp.output("db-sigs.txt")
diff --git a/src/python/tests/test_manysearch.py b/src/python/tests/test_manysearch.py
index 6275b0cf..0e84fb00 100644
--- a/src/python/tests/test_manysearch.py
+++ b/src/python/tests/test_manysearch.py
@@ -556,33 +556,6 @@ def test_bad_query_2(runtmp, capfd, indexed):
     )
 
 
-def test_bad_query_3(runtmp, capfd):
-    # test with a bad query (a .sig.gz file renamed as zip file)
-    against_list = runtmp.output("against.txt")
-
-    sig2 = get_test_data("2.fa.sig.gz")
-    sig47 = get_test_data("47.fa.sig.gz")
-    sig63 = get_test_data("63.fa.sig.gz")
-
-    query_zip = runtmp.output("query.zip")
-    # cp sig2 into query_zip
-    with open(query_zip, "wb") as fp:
-        with open(sig2, "rb") as fp2:
-            fp.write(fp2.read())
-
-    make_file_list(against_list, [sig2, sig47, sig63])
-
-    output = runtmp.output("out.csv")
-
-    with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash("scripts", "multisearch", query_zip, against_list, "-o", output)
-
-    captured = capfd.readouterr()
-    print(captured.err)
-
-    assert "InvalidArchive" in captured.err
-
-
 def test_missing_against(runtmp, capfd, indexed):
     # test with a missing against list
     query_list = runtmp.output("query.txt")
diff --git a/src/python/tests/test_multisearch.py b/src/python/tests/test_multisearch.py
index dfc65ee2..3be64fed 100644
--- a/src/python/tests/test_multisearch.py
+++ b/src/python/tests/test_multisearch.py
@@ -628,33 +628,6 @@ def test_bad_query(runtmp, capfd):
     )
 
 
-def test_bad_query_3(runtmp, capfd):
-    # test with a bad query (a .sig.gz file renamed as zip file)
-    against_list = runtmp.output("against.txt")
-
-    sig2 = get_test_data("2.fa.sig.gz")
-    sig47 = get_test_data("47.fa.sig.gz")
-    sig63 = get_test_data("63.fa.sig.gz")
-
-    query_zip = runtmp.output("query.zip")
-    # cp sig2 into query_zip
-    with open(query_zip, "wb") as fp:
-        with open(sig2, "rb") as fp2:
-            fp.write(fp2.read())
-
-    make_file_list(against_list, [sig2, sig47, sig63])
-
-    output = runtmp.output("out.csv")
-
-    with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash("scripts", "multisearch", query_zip, against_list, "-o", output)
-
-    captured = capfd.readouterr()
-    print(captured.err)
-
-    assert "InvalidArchive" in captured.err
-
-
 def test_missing_against(runtmp, capfd, zip_db):
     # test with a missing against list
     query_list = runtmp.output("query.txt")
diff --git a/src/python/tests/test_pairwise.py b/src/python/tests/test_pairwise.py
index 1a940043..c4021239 100644
--- a/src/python/tests/test_pairwise.py
+++ b/src/python/tests/test_pairwise.py
@@ -230,29 +230,6 @@ def test_bad_query(runtmp, capfd):
     )
 
 
-def test_bad_query_2(runtmp, capfd):
-    # test with a bad query (a .sig.gz file renamed as zip file)
-    sig2 = get_test_data("2.fa.sig.gz")
-    sig47 = get_test_data("47.fa.sig.gz")
-    sig63 = get_test_data("63.fa.sig.gz")
-
-    query_zip = runtmp.output("query.zip")
-    # cp sig2 into query_zip
-    with open(query_zip, "wb") as fp:
-        with open(sig2, "rb") as fp2:
-            fp.write(fp2.read())
-
-    output = runtmp.output("out.csv")
-
-    with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash("scripts", "pairwise", query_zip, "-o", output)
-
-    captured = capfd.readouterr()
-    print(captured.err)
-
-    assert "InvalidArchive" in captured.err
-
-
 def test_missing_query(runtmp, capfd, zip_db):
     # test with a missing query list
     query_list = runtmp.output("query.txt")

From a950309e521f4efcb4dfe3320fc5b252148fa6ed Mon Sep 17 00:00:00 2001
From: "C. Titus Brown" <titus@idyll.org>
Date: Thu, 19 Dec 2024 14:47:44 -0800
Subject: [PATCH 4/5] MRG: add `--output-all-comparisons` to manysearch,
 multisearch, and pairwise (#544)

* add --output-all-comparisons to manysearch

* add doc, allow -A

* update docs, add -A to all the things

* cargo fmt

* clippy

* check for directories, fail appropriately

* run black

* test -A

* ran black again

* fix test

* fiz

* fiz
---
 Cargo.lock                                    |   2 +-
 doc/README.md                                 |  41 ++--
 src/lib.rs                                    |  14 +-
 src/manysearch.rs                             | 178 ++++++++++--------
 src/manysearch_rocksdb.rs                     |  10 +-
 src/multisearch.rs                            |   3 +-
 src/pairwise.rs                               |   8 +-
 .../sourmash_plugin_branchwater/__init__.py   |  22 +++
 src/python/tests/test_manysearch.py           | 110 +++++++++++
 src/python/tests/test_multisearch.py          |  74 ++++++++
 src/python/tests/test_pairwise.py             |  56 ++++++
 src/utils/mod.rs                              |  16 +-
 12 files changed, 428 insertions(+), 106 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index b1e7a9aa..933b5a90 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -206,7 +206,7 @@ dependencies = [
  "bitflags",
  "cexpr",
  "clang-sys",
- "itertools 0.12.1",
+ "itertools 0.11.0",
  "lazy_static",
  "lazycell",
  "proc-macro2",
diff --git a/doc/README.md b/doc/README.md
index 6da3b286..744348fb 100644
--- a/doc/README.md
+++ b/doc/README.md
@@ -29,13 +29,11 @@ be processed differently.  The plugin commands are also a bit less
 user friendly, because (for now) we're more focused on speed than
 polish and user experience.
 
-**Note:** As of v0.9.5, the outputs of `fastgather` and `fastmultigather` almost completely match the output of `sourmash gather`; see below for details.
-
 ## Input file formats
 
 sourmash supports a variety of different storage formats for sketches (see [sourmash docs](https://sourmash.readthedocs.io/en/latest/command-line.html#choosing-signature-output-formats)), and the branchwater plugin works with some (but not all) of them. Branchwater _also_ supports an additional database type, a RocksDB-based inverted index, that is not (yet) supported natively by sourmash (through v4.8.11).
 
-**As of v0.9.8, we recommend using zip files or standalone manifest CSVs pointing to zip files whenever you need to provide multiple sketches.**
+**We recommend using zip files or standalone manifest CSVs pointing to zip files whenever you need to provide multiple sketches.**
 
 | command | command input | database format |
 | -------- | -------- | -------- |
@@ -58,8 +56,8 @@ When working with large collections of small sketches such as genomes, we sugges
 * in particular, _single_ sketches can be loaded on demand, supporting lower memory requirements for certain kinds of searches.
 
 For all these reasons, zip files are the most efficient and effective
-basic storage type for sketches in sourmash, and as of the branchwater
-plugin v0.9.0, they are fully supported!
+basic storage type for sketches in sourmash, and the branchwater
+plugin fully supports them!
 
 You can create zipfiles with sourmash like so:
 ```
@@ -152,7 +150,7 @@ at the start in order to generate a manifest. To avoid memory issues,
 the signatures are not kept in memory, but instead re-loaded as
 described below for each command (see: Notes on concurrency and
 efficiency). This makes using pathlists less efficient than `zip`
-files (as of v0.9.0) or manifests (as of v0.9.8).
+files.
 
 ## Running the commands
 
@@ -304,7 +302,7 @@ version of `sourmash gather`.
 sourmash scripts fastgather query.sig.gz database.zip -o results.csv --cores 4
 ```
 
-As of v0.9.5, `fastgather` outputs the same columns as `sourmash gather`, with only a few exception
+`fastgather` outputs the same columns as `sourmash gather`, with only a few exception
 * `match_name` is output instead of `name`;
 * `match_md5` is output instead of `md5`;
 * `match_filename` is output instead of `filename`, and the value is different;
@@ -392,6 +390,11 @@ To report _any_ overlap between two sketches, set the threshold to 0.
 (This will produce many, many results when searching a collection of
 metagenomes!)
 
+Using `-A/--output-all-comparisons` will ignore the threshold parameter
+and output all comparisons done. Against a RocksDB database, only matches
+with some overlap will be reported; with collections of sketches, all
+pairs will be reported.
+
 By default, `manysearch` will display the contents of the CSV file in a
 human-readable format. This can be disabled with `-N/--no-pretty-print`
 when executing large searches.
@@ -452,14 +455,14 @@ pathlist format, and specify the desired output directory; we suggest
 using the `.rocksdb` extension for RocksDB databases, e.g. `-o
 gtdb-rs214-k31.rocksdb`.
 
-By default, as of v0.9.7, `index` will store a copy of the sketches
+By default, `index` will store a copy of the sketches
 along with the inverted index.  This will substantially increase the
 disk space required for large databases.  You can provide an optional
 `--no-internal-storage` to `index` to store them externally, which
 reduces the disk space needed for the index.  Read below for technical
 details!
 
-As of v0.9.8, `index` can take any of the supported input types, but
+`index` can take any of the supported input types, but
 unless you are using a zip file or a pathlist of JSON files, it may
 need to load all the sketches into memory before indexing
 them. Moreover, you can only use external storage with a zip file. We
@@ -470,9 +473,6 @@ the sketches are being loaded into memory.
 
 #### Internal vs external storage of sketches in a RocksDB index
 
-(The below applies to v0.9.7 and later of the plugin; for v0.9.6 and
-before, only external storage was implemented.)
-
 RocksDB indexes support containment queries (a la the
 [branchwater application](https://github.com/sourmash-bio/branchwater)),
 as well as `gather`-style mixture decomposition (see
@@ -489,7 +489,7 @@ the original source sketches used to construct the database, wherever
 they reside on your disk.
 
 The sketches *are not used* by `manysearch`, but *are used* by
-`fastmultigather`: with v0.9.6 and later, you'll get an error if you
+`fastmultigather`: you'll get an error if you
 run `fastmultigather` against a RocksDB index where the sketches
 cannot be loaded.
 
@@ -521,6 +521,21 @@ in downstream software packages (this plugin, and
 [the branchwater application code](https://github.com/sourmash-bio/branchwater)).
 The above documentation applies to sourmash core v0.15.0.
 
+## Notes on versioning and semantic versioning guarantees
+
+Unlike sourmash,
+[which provides guarantees that command-line options and outputs will not change within minor versions](https://sourmash.readthedocs.io/en/latest/support.html#versioning-and-stability-of-features-and-apis),
+we make no guarantees of stability within the branchwater plugin. This
+is because the branchwater plugin is intended to move fast and
+occasionally break things.
+
+Eventually we expect to provide all of the branchwater plugin's functionality within the sourmash package, at which time the sourmash guarantees will apply!
+
+However, we do not expect command line options and output file formats
+to change quickly.
+
+We will also endeavor to avoid changing column names in CSV output, although, we may change the _order_ of column names on occasion. Please use the column headers (column names) to select specific columns.
+
 ## Notes on concurrency and efficiency
 
 Each command does things somewhat differently, with implications for
diff --git a/src/lib.rs b/src/lib.rs
index afa5b857..8226a633 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -28,7 +28,7 @@ mod singlesketch;
 use camino::Utf8PathBuf as PathBuf;
 
 #[pyfunction]
-#[pyo3(signature = (querylist_path, siglist_path, threshold, ksize, scaled, moltype, output_path=None, ignore_abundance=false))]
+#[pyo3(signature = (querylist_path, siglist_path, threshold, ksize, scaled, moltype, output_path=None, ignore_abundance=false, output_all_comparisons=false))]
 #[allow(clippy::too_many_arguments)]
 fn do_manysearch(
     querylist_path: String,
@@ -39,6 +39,7 @@ fn do_manysearch(
     moltype: String,
     output_path: Option<String>,
     ignore_abundance: Option<bool>,
+    output_all_comparisons: Option<bool>,
 ) -> anyhow::Result<u8> {
     let againstfile_path: PathBuf = siglist_path.clone().into();
     let selection = build_selection(ksize, scaled, &moltype);
@@ -46,6 +47,7 @@ fn do_manysearch(
     let allow_failed_sigpaths = true;
 
     let ignore_abundance = ignore_abundance.unwrap_or(false);
+    let output_all_comparisons = output_all_comparisons.unwrap_or(false);
 
     // if siglist_path is revindex, run rocksdb manysearch; otherwise run manysearch
     if is_revindex_database(&againstfile_path) {
@@ -57,6 +59,7 @@ fn do_manysearch(
             threshold,
             output_path,
             allow_failed_sigpaths,
+            output_all_comparisons,
         ) {
             Ok(_) => Ok(0),
             Err(e) => {
@@ -73,6 +76,7 @@ fn do_manysearch(
             output_path,
             allow_failed_sigpaths,
             ignore_abundance,
+            output_all_comparisons,
         ) {
             Ok(_) => Ok(0),
             Err(e) => {
@@ -232,7 +236,7 @@ fn do_check(index: String, quick: bool) -> anyhow::Result<u8> {
 }
 
 #[pyfunction]
-#[pyo3(signature = (querylist_path, siglist_path, threshold, ksize, scaled, moltype, estimate_ani, estimate_prob_overlap, output_path=None))]
+#[pyo3(signature = (querylist_path, siglist_path, threshold, ksize, scaled, moltype, estimate_ani, estimate_prob_overlap, output_all_comparisons, output_path=None))]
 #[allow(clippy::too_many_arguments)]
 fn do_multisearch(
     querylist_path: String,
@@ -243,6 +247,7 @@ fn do_multisearch(
     moltype: String,
     estimate_ani: bool,
     estimate_prob_overlap: bool,
+    output_all_comparisons: bool,
     output_path: Option<String>,
 ) -> anyhow::Result<u8> {
     let _ = env_logger::try_init();
@@ -258,6 +263,7 @@ fn do_multisearch(
         allow_failed_sigpaths,
         estimate_ani,
         estimate_prob_overlap,
+        output_all_comparisons,
         output_path,
     ) {
         Ok(_) => Ok(0),
@@ -270,7 +276,7 @@ fn do_multisearch(
 
 #[pyfunction]
 #[allow(clippy::too_many_arguments)]
-#[pyo3(signature = (siglist_path, threshold, ksize, scaled, moltype, estimate_ani, write_all, output_path=None))]
+#[pyo3(signature = (siglist_path, threshold, ksize, scaled, moltype, estimate_ani, write_all, output_all_comparisons, output_path=None))]
 fn do_pairwise(
     siglist_path: String,
     threshold: f64,
@@ -279,6 +285,7 @@ fn do_pairwise(
     moltype: String,
     estimate_ani: bool,
     write_all: bool,
+    output_all_comparisons: bool,
     output_path: Option<String>,
 ) -> anyhow::Result<u8> {
     let selection = build_selection(ksize, scaled, &moltype);
@@ -290,6 +297,7 @@ fn do_pairwise(
         allow_failed_sigpaths,
         estimate_ani,
         write_all,
+        output_all_comparisons,
         output_path,
     ) {
         Ok(_) => Ok(0),
diff --git a/src/manysearch.rs b/src/manysearch.rs
index 968f1548..d5d629ca 100644
--- a/src/manysearch.rs
+++ b/src/manysearch.rs
@@ -9,7 +9,9 @@ use stats::{median, stddev};
 use std::sync::atomic;
 use std::sync::atomic::AtomicUsize;
 
-use crate::utils::{csvwriter_thread, load_collection, ReportType, SearchResult};
+use crate::utils::{
+    csvwriter_thread, load_collection, ManySearchResult, ReportType, SmallSignature,
+};
 use sourmash::ani_utils::ani_from_containment;
 use sourmash::errors::SourmashError;
 use sourmash::selection::Selection;
@@ -25,6 +27,7 @@ pub fn manysearch(
     output: Option<String>,
     allow_failed_sigpaths: bool,
     ignore_abundance: bool,
+    output_all_comparisons: bool,
 ) -> Result<()> {
     // Load query collection
     let query_collection = load_collection(
@@ -61,7 +64,8 @@ pub fn manysearch(
     )?;
 
     // set up a multi-producer, single-consumer channel.
-    let (send, recv) = std::sync::mpsc::sync_channel::<SearchResult>(rayon::current_num_threads());
+    let (send, recv) =
+        std::sync::mpsc::sync_channel::<ManySearchResult>(rayon::current_num_threads());
 
     // & spawn a thread that is dedicated to printing to a buffered output
     let thrd = csvwriter_thread(recv, output);
@@ -95,80 +99,18 @@ pub fn manysearch(
                         <SigStore as TryInto<KmerMinHash>>::try_into(against_sig)
                     {
                         for query in query_sketchlist.iter() {
-                            // be paranoid and confirm scaled match.
-                            if query.minhash.scaled() != common_scaled {
-                                panic!("different query scaled");
-                            }
-                            if against_mh.scaled() != common_scaled {
-                                panic!("different against scaled");
-                            }
-
-                            let overlap = query
-                                .minhash
-                                .count_common(&against_mh, false)
-                                .expect("incompatible sketches")
-                                as f64;
-
-                            let query_size = query.minhash.size() as f64;
-                            let containment_query_in_target = overlap / query_size;
-                            // only calculate results if we have shared hashes
-                            if containment_query_in_target > threshold {
-                                let target_size = against_mh.size() as f64;
-                                let containment_target_in_query = overlap / target_size;
-
-                                let max_containment =
-                                    containment_query_in_target.max(containment_target_in_query);
-                                let jaccard = overlap / (target_size + query_size - overlap);
-
-                                let qani = ani_from_containment(
-                                    containment_query_in_target,
-                                    against_mh.ksize() as f64,
-                                );
-                                let mani = ani_from_containment(
-                                    containment_target_in_query,
-                                    against_mh.ksize() as f64,
-                                );
-                                let query_containment_ani = Some(qani);
-                                let match_containment_ani = Some(mani);
-                                let average_containment_ani = Some((qani + mani) / 2.);
-                                let max_containment_ani = Some(f64::max(qani, mani));
-
-                                let calc_abund_stats =
-                                    against_mh.track_abundance() && !ignore_abundance;
-                                let (
-                                    total_weighted_hashes,
-                                    n_weighted_found,
-                                    average_abund,
-                                    median_abund,
-                                    std_abund,
-                                ) = if calc_abund_stats {
-                                    inflate_abundances(&query.minhash, &against_mh).ok()?
-                                } else {
-                                    (None, None, None, None, None)
-                                };
-
-                                results.push(SearchResult {
-                                    query_name: query.name.clone(),
-                                    query_md5: query.md5sum.clone(),
-                                    match_name: against_name.clone(),
-                                    containment: containment_query_in_target,
-                                    intersect_hashes: overlap as u64,
-                                    ksize: query.minhash.ksize() as u16,
-                                    scaled: query.minhash.scaled(),
-                                    moltype: query.minhash.hash_function().to_string(),
-                                    match_md5: Some(against_md5.clone()),
-                                    jaccard: Some(jaccard),
-                                    max_containment: Some(max_containment),
-                                    average_abund,
-                                    median_abund,
-                                    std_abund,
-                                    query_containment_ani,
-                                    match_containment_ani,
-                                    average_containment_ani,
-                                    max_containment_ani,
-                                    n_weighted_found,
-                                    total_weighted_hashes,
-                                });
+                            let sr = calculate_manysearch_result(
+                                query,
+                                &against_mh,
+                                &against_name,
+                                &against_md5,
+                                threshold,
+                                common_scaled,
+                                ignore_abundance,
+                                output_all_comparisons,
+                            );
+                            if let Some(sr) = sr {
+                                results.push(sr);
                             }
                         }
                     } else {
@@ -226,6 +168,9 @@ pub fn manysearch(
     Ok(())
 }
 
+// inflate_abundances: "borrow" the abundances from 'against' onto the
+// intersection with 'query'.
+
 fn inflate_abundances(
     query: &KmerMinHash,
     against: &KmerMinHash,
@@ -241,10 +186,9 @@ fn inflate_abundances(
 > {
     let abunds: Vec<u64>;
     let sum_weighted: u64;
-    let sum_all_abunds: u64;
+    let sum_all_abunds: u64 = against.sum_abunds();
 
     (abunds, sum_weighted) = query.inflated_abundances(against)?;
-    sum_all_abunds = against.sum_abunds();
 
     let average_abund = sum_weighted as f64 / abunds.len() as f64;
     let median_abund = median(abunds.iter().cloned()).expect("error");
@@ -258,3 +202,81 @@ fn inflate_abundances(
         Some(std_abund),
     ))
 }
+
+// calculate_manysearch_result: calculate all the things
+
+fn calculate_manysearch_result(
+    query: &SmallSignature,
+    against_mh: &KmerMinHash,
+    against_name: &str,
+    against_md5: &str,
+    threshold: f64,
+    common_scaled: u32,
+    ignore_abundance: bool,
+    output_all_comparisons: bool,
+) -> Option<ManySearchResult> {
+    // be paranoid and confirm scaled match.
+    if query.minhash.scaled() != common_scaled {
+        panic!("different query scaled");
+    }
+    if against_mh.scaled() != common_scaled {
+        panic!("different against scaled");
+    }
+
+    let overlap = query
+        .minhash
+        .count_common(against_mh, false)
+        .expect("incompatible sketches") as f64;
+
+    let query_size = query.minhash.size() as f64;
+    let containment_query_in_target = overlap / query_size;
+
+    // only calculate results if we have shared hashes
+    if containment_query_in_target > threshold || output_all_comparisons {
+        let target_size = against_mh.size() as f64;
+        let containment_target_in_query = overlap / target_size;
+
+        let max_containment = containment_query_in_target.max(containment_target_in_query);
+        let jaccard = overlap / (target_size + query_size - overlap);
+
+        let qani = ani_from_containment(containment_query_in_target, against_mh.ksize() as f64);
+        let mani = ani_from_containment(containment_target_in_query, against_mh.ksize() as f64);
+        let query_containment_ani = Some(qani);
+        let match_containment_ani = Some(mani);
+        let average_containment_ani = Some((qani + mani) / 2.);
+        let max_containment_ani = Some(f64::max(qani, mani));
+
+        let calc_abund_stats = against_mh.track_abundance() && !ignore_abundance;
+        let (total_weighted_hashes, n_weighted_found, average_abund, median_abund, std_abund) =
+            if calc_abund_stats {
+                inflate_abundances(&query.minhash, against_mh).ok()?
+            } else {
+                (None, None, None, None, None)
+            };
+
+        let sr = ManySearchResult {
+            query_name: query.name.clone(),
+            query_md5: query.md5sum.clone(),
+            match_name: against_name.to_string(),
+            containment: containment_query_in_target,
+            intersect_hashes: overlap as u64,
+            ksize: query.minhash.ksize() as u16,
+            scaled: query.minhash.scaled(),
+            moltype: query.minhash.hash_function().to_string(),
+            match_md5: Some(against_md5.to_string()),
+            jaccard: Some(jaccard),
+            max_containment: Some(max_containment),
+            average_abund,
+            median_abund,
+            std_abund,
+            query_containment_ani,
+            match_containment_ani,
+            average_containment_ani,
+            max_containment_ani,
+            n_weighted_found,
+            total_weighted_hashes,
+        };
+        return Some(sr);
+    }
+    None
+}
diff --git a/src/manysearch_rocksdb.rs b/src/manysearch_rocksdb.rs
index 30f75dd5..85fd8542 100644
--- a/src/manysearch_rocksdb.rs
+++ b/src/manysearch_rocksdb.rs
@@ -14,7 +14,7 @@ use sourmash::sketch::minhash::KmerMinHash;
 use sourmash::storage::SigStore;
 
 use crate::utils::{
-    csvwriter_thread, is_revindex_database, load_collection, ReportType, SearchResult,
+    csvwriter_thread, is_revindex_database, load_collection, ManySearchResult, ReportType,
 };
 
 pub fn manysearch_rocksdb(
@@ -24,6 +24,7 @@ pub fn manysearch_rocksdb(
     minimum_containment: f64,
     output: Option<String>,
     allow_failed_sigpaths: bool,
+    output_all_comparisons: bool,
 ) -> Result<(), Box<dyn std::error::Error>> {
     if !is_revindex_database(&index) {
         bail!("'{}' is not a valid RevIndex database", index);
@@ -68,7 +69,8 @@ pub fn manysearch_rocksdb(
     )?;
 
     // set up a multi-producer, single-consumer channel.
-    let (send, recv) = std::sync::mpsc::sync_channel::<SearchResult>(rayon::current_num_threads());
+    let (send, recv) =
+        std::sync::mpsc::sync_channel::<ManySearchResult>(rayon::current_num_threads());
 
     // & spawn a thread that is dedicated to printing to a buffered output
     let thrd = csvwriter_thread(recv, output);
@@ -107,13 +109,13 @@ pub fn manysearch_rocksdb(
                         // filter the matches for containment
                         for (path, overlap) in matches {
                             let containment = overlap as f64 / query_size as f64;
-                            if containment >= minimum_containment {
+                            if containment >= minimum_containment || output_all_comparisons {
                                 let query_containment_ani = Some(ani_from_containment(
                                     containment,
                                     query_mh.ksize() as f64,
                                 ));
 
-                                results.push(SearchResult {
+                                results.push(ManySearchResult {
                                     query_name: query_name.clone(),
                                     query_md5: query_md5.clone(),
                                     match_name: path.clone(),
diff --git a/src/multisearch.rs b/src/multisearch.rs
index 0aabba36..fb030a2b 100644
--- a/src/multisearch.rs
+++ b/src/multisearch.rs
@@ -143,6 +143,7 @@ pub fn multisearch(
     allow_failed_sigpaths: bool,
     estimate_ani: bool,
     estimate_prob_overlap: bool,
+    output_all_comparisons: bool,
     output: Option<String>,
 ) -> Result<(), Box<dyn std::error::Error>> {
     // Load all queries into memory at once.
@@ -249,7 +250,7 @@ pub fn multisearch(
 
                 let containment_query_in_target = overlap / query_size;
 
-                if containment_query_in_target > threshold {
+                if containment_query_in_target > threshold || output_all_comparisons {
                     let containment_target_in_query = overlap / target_size;
                     let max_containment =
                         containment_query_in_target.max(containment_target_in_query);
diff --git a/src/pairwise.rs b/src/pairwise.rs
index b9e1a3b3..f5d2d362 100644
--- a/src/pairwise.rs
+++ b/src/pairwise.rs
@@ -20,6 +20,7 @@ pub fn pairwise(
     allow_failed_sigpaths: bool,
     estimate_ani: bool,
     write_all: bool,
+    output_all_comparisons: bool,
     output: Option<String>,
 ) -> Result<(), Box<dyn std::error::Error>> {
     // Load all sigs into memory at once.
@@ -86,7 +87,10 @@ pub fn pairwise(
             let containment_adjusted_log10 = None;
             let tf_idf_score = None;
 
-            if containment_q1_in_q2 > threshold || containment_q2_in_q1 > threshold {
+            if containment_q1_in_q2 > threshold
+                || containment_q2_in_q1 > threshold
+                || output_all_comparisons
+            {
                 let max_containment = containment_q1_in_q2.max(containment_q2_in_q1);
                 let jaccard = overlap / (query1_size + query2_size - overlap);
                 let mut query_containment_ani = None;
@@ -133,7 +137,7 @@ pub fn pairwise(
                 eprintln!("Processed {} comparisons", i);
             }
         }
-        if write_all {
+        if write_all || output_all_comparisons {
             let mut query_containment_ani = None;
             let mut match_containment_ani = None;
             let mut average_containment_ani = None;
diff --git a/src/python/sourmash_plugin_branchwater/__init__.py b/src/python/sourmash_plugin_branchwater/__init__.py
index 3de354aa..ba2f5798 100755
--- a/src/python/sourmash_plugin_branchwater/__init__.py
+++ b/src/python/sourmash_plugin_branchwater/__init__.py
@@ -107,6 +107,12 @@ def __init__(self, p):
             action="store_true",
             help="do not do expensive abundance calculations",
         )
+        p.add_argument(
+            "-A",
+            "--output-all-comparisons",
+            action="store_true",
+            help="ignore threshold and output all comparisons; against a RocksDB database, this will only output comparisons with some overlap",
+        )
 
     def main(self, args):
         print_version()
@@ -129,6 +135,7 @@ def main(self, args):
             args.moltype,
             args.output,
             args.ignore_abundance,
+            args.output_all_comparisons,
         )
         if status == 0:
             notify(f"...manysearch is done! results in '{args.output}'")
@@ -462,6 +469,12 @@ def __init__(self, p):
             action="store_true",
             help="estimate probability of overlap for significance ranking of search results, of the specific query and match, given all queries and possible matches",
         )
+        p.add_argument(
+            "-A",
+            "--output-all-comparisons",
+            action="store_true",
+            help="ignore threshold and output all comparisons",
+        )
 
     def main(self, args):
         print_version()
@@ -488,6 +501,7 @@ def main(self, args):
             args.moltype,
             args.ani,
             args.prob_significant_overlap,
+            args.output_all_comparisons,
             args.output,
         )
         if status == 0:
@@ -545,9 +559,16 @@ def __init__(self, p):
         )
         p.add_argument(
             "--write-all",
+            "--write-self-comparisons",
             action="store_true",
             help="write self comparisons for all sketches",
         )
+        p.add_argument(
+            "-A",
+            "--output-all-comparisons",
+            action="store_true",
+            help="ignore threshold and output all comparisons",
+        )
 
     def main(self, args):
         print_version()
@@ -570,6 +591,7 @@ def main(self, args):
             args.moltype,
             args.ani,
             args.write_all,
+            args.output_all_comparisons,
             args.output,
         )
         if status == 0:
diff --git a/src/python/tests/test_manysearch.py b/src/python/tests/test_manysearch.py
index 6275b0cf..f978272f 100644
--- a/src/python/tests/test_manysearch.py
+++ b/src/python/tests/test_manysearch.py
@@ -113,6 +113,116 @@ def test_simple(runtmp, zip_query, zip_against):
                 assert max_ani == 0.9772
 
 
+def test_simple_output_all(runtmp, zip_query, zip_against):
+    # test basic execution!
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
+
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
+
+    make_file_list(query_list, [sig2, sig47, sig63])
+    make_file_list(against_list, [sig2, sig47, sig63])
+
+    output = runtmp.output("out.csv")
+
+    if zip_query:
+        query_list = zip_siglist(runtmp, query_list, runtmp.output("query.zip"))
+    if zip_against:
+        against_list = zip_siglist(runtmp, against_list, runtmp.output("against.zip"))
+
+    runtmp.sourmash(
+        "scripts",
+        "manysearch",
+        query_list,
+        against_list,
+        "-o",
+        output,
+        "-t",
+        "0.01",
+        "-A",
+    )
+    assert os.path.exists(output)
+
+    df = pandas.read_csv(output)
+    assert len(df) == 9
+
+    dd = df.to_dict(orient="index")
+    print(dd)
+
+    for idx, row in dd.items():
+        # identical?
+        if row["match_name"] == row["query_name"]:
+            assert row["query_md5"] == row["match_md5"], row
+            assert float(row["containment"] == 1.0)
+            assert float(row["jaccard"] == 1.0)
+            assert float(row["max_containment"] == 1.0)
+            assert float(row["query_containment_ani"] == 1.0)
+            assert float(row["match_containment_ani"] == 1.0)
+            assert float(row["average_containment_ani"] == 1.0)
+            assert float(row["max_containment_ani"] == 1.0)
+
+        else:
+            # confirm hand-checked numbers
+            q = row["query_name"].split()[0]
+            m = row["match_name"].split()[0]
+            cont = float(row["containment"])
+            jaccard = float(row["jaccard"])
+            maxcont = float(row["max_containment"])
+            intersect_hashes = int(row["intersect_hashes"])
+            query_ani = float(row["query_containment_ani"])
+            match_ani = float(row["match_containment_ani"])
+            average_ani = float(row["average_containment_ani"])
+            max_ani = float(row["max_containment_ani"])
+            jaccard = round(jaccard, 4)
+            cont = round(cont, 4)
+            maxcont = round(maxcont, 4)
+            query_ani = round(query_ani, 4)
+            match_ani = round(match_ani, 4)
+            average_ani = round(average_ani, 4)
+            max_ani = round(max_ani, 4)
+            print(
+                q,
+                m,
+                f"{jaccard:.04}",
+                f"{cont:.04}",
+                f"{maxcont:.04}",
+                f"{query_ani:.04}",
+                f"{match_ani:.04}",
+                f"{average_ani:.04}",
+                f"{max_ani:.04}",
+            )
+
+            if q == "NC_011665.1" and m == "NC_009661.1":
+                assert jaccard == 0.3207
+                assert cont == 0.4828
+                assert maxcont == 0.4885
+                assert intersect_hashes == 2529
+                assert query_ani == 0.9768
+                assert match_ani == 0.9772
+                assert average_ani == 0.977
+                assert max_ani == 0.9772
+            elif q == "NC_009661.1" and m == "NC_011665.1":
+                assert jaccard == 0.3207
+                assert cont == 0.4885
+                assert maxcont == 0.4885
+                assert intersect_hashes == 2529
+                assert query_ani == 0.9772
+                assert match_ani == 0.9768
+                assert average_ani == 0.977
+                assert max_ani == 0.9772
+            else:
+                assert jaccard == 0
+                assert cont == 0
+                assert maxcont == 0
+                assert intersect_hashes == 0
+                assert query_ani == 0
+                assert match_ani == 0
+                assert average_ani == 0
+                assert max_ani == 0
+
+
 def test_simple_abund(runtmp):
     # test with abund sig
     sig2 = get_test_data("2.fa.sig.gz")
diff --git a/src/python/tests/test_multisearch.py b/src/python/tests/test_multisearch.py
index dfc65ee2..7b82dc33 100644
--- a/src/python/tests/test_multisearch.py
+++ b/src/python/tests/test_multisearch.py
@@ -91,6 +91,80 @@ def test_simple_no_ani(runtmp, zip_query, zip_db):
                 assert intersect_hashes == 2529
 
 
+def test_simple_no_ani_output_all(runtmp, zip_query, zip_db):
+    # test basic execution!
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
+
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
+
+    make_file_list(query_list, [sig2, sig47, sig63])
+    make_file_list(against_list, [sig2, sig47, sig63])
+
+    output = runtmp.output("out.csv")
+
+    if zip_db:
+        against_list = zip_siglist(runtmp, against_list, runtmp.output("db.zip"))
+    if zip_query:
+        query_list = zip_siglist(runtmp, query_list, runtmp.output("query.zip"))
+
+    runtmp.sourmash(
+        "scripts", "multisearch", query_list, against_list, "-o", output, "-A"
+    )
+    assert os.path.exists(output)
+
+    df = pandas.read_csv(output)
+    assert len(df) == 9
+
+    dd = df.to_dict(orient="index")
+    print(dd)
+
+    for idx, row in dd.items():
+        assert not ("prob_overlap" in row)
+
+        # identical?
+        if row["match_name"] == row["query_name"]:
+            assert row["query_md5"] == row["match_md5"], row
+            assert float(row["containment"] == 1.0)
+            assert float(row["jaccard"] == 1.0)
+            assert float(row["max_containment"] == 1.0)
+            assert "query_containment_ani" not in row
+            assert "match_containment_ani" not in row
+            assert "average_containment_ani" not in row
+            assert "max_containment_ani" not in row
+
+        else:
+            # confirm hand-checked numbers
+            q = row["query_name"].split()[0]
+            m = row["match_name"].split()[0]
+            cont = float_round(row["containment"], 4)
+            jaccard = float_round(row["jaccard"], 4)
+            maxcont = float_round(row["max_containment"], 4)
+
+            intersect_hashes = int(row["intersect_hashes"])
+
+            print(q, m, f"{jaccard:.04}", f"{cont:.04}", f"{maxcont:.04}")
+
+            if q == "NC_011665.1" and m == "NC_009661.1":
+                assert jaccard == 0.3207
+                assert cont == 0.4828
+                assert maxcont == 0.4885
+                assert intersect_hashes == 2529
+
+            elif q == "NC_009661.1" and m == "NC_011665.1":
+                assert jaccard == 0.3207
+                assert cont == 0.4885
+                assert maxcont == 0.4885
+                assert intersect_hashes == 2529
+            else:
+                assert jaccard == 0
+                assert cont == 0
+                assert maxcont == 0
+                assert intersect_hashes == 0
+
+
 def test_simple_prob_overlap(runtmp, zip_query, zip_db, indexed_query, indexed_against):
     # test basic execution!
     query_list = runtmp.output("query.txt")
diff --git a/src/python/tests/test_pairwise.py b/src/python/tests/test_pairwise.py
index 1a940043..1bf469bf 100644
--- a/src/python/tests/test_pairwise.py
+++ b/src/python/tests/test_pairwise.py
@@ -20,6 +20,17 @@ def test_installed(runtmp):
     assert "usage:  pairwise" in runtmp.last_result.err
 
 
+def test_on_dir(runtmp, capfd):
+    with pytest.raises(utils.SourmashCommandFailed):
+        runtmp.sourmash(
+            "scripts", "pairwise", runtmp.output(""), "-o", runtmp.output("xxx.csv")
+        )
+
+    captured = capfd.readouterr()
+    print(captured.err)
+    assert "arbitrary directories are not supported" in captured.err
+
+
 def test_simple_no_ani(runtmp, capfd, zip_query, indexed):
     # test basic execution!
     query_list = runtmp.output("query.txt")
@@ -80,6 +91,31 @@ def test_simple_no_ani(runtmp, capfd, zip_query, indexed):
         )
 
 
+def test_simple_no_ani_output_all(runtmp, capfd, zip_query, indexed):
+    # test basic execution!
+    query_list = runtmp.output("query.txt")
+
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
+
+    make_file_list(query_list, [sig2, sig47, sig63])
+
+    output = runtmp.output("out.csv")
+
+    if zip_query:
+        query_list = zip_siglist(runtmp, query_list, runtmp.output("query.zip"))
+
+    if indexed:
+        query_list = index_siglist(runtmp, query_list, runtmp.output("db"))
+
+    runtmp.sourmash("scripts", "pairwise", query_list, "-o", output, "-t", "-1", "-A")
+    assert os.path.exists(output)
+
+    df = pandas.read_csv(output)
+    assert len(df) == 6
+
+
 def test_simple_ani(runtmp, zip_query):
     # test basic execution!
     query_list = runtmp.output("query.txt")
@@ -147,6 +183,26 @@ def test_simple_ani(runtmp, zip_query):
             assert q2_ani == 0.9772
             assert avg_ani == 0.977
             assert max_ani == 0.9772
+        elif m == "NC_011665.1" and q == "NC_009661.1":
+            assert jaccard == 0.3207
+            assert cont == 0.4885
+            assert maxcont == 0.4885
+            assert intersect_hashes == 2529
+            assert q2_ani == 0.9768
+            assert q1_ani == 0.9772
+            assert avg_ani == 0.977
+            assert max_ani == 0.9772
+        elif q == m:
+            assert jaccard == 1
+        else:
+            assert jaccard == 0
+            assert cont == 0
+            assert maxcont == 0
+            assert intersect_hashes == 0
+            assert q1_ani == 0
+            assert q2_ani == 0
+            assert avg_ani == 0
+            assert max_ani == 0
 
 
 def test_simple_threshold(runtmp, zip_query):
diff --git a/src/utils/mod.rs b/src/utils/mod.rs
index 3f8ffc79..5674d13d 100644
--- a/src/utils/mod.rs
+++ b/src/utils/mod.rs
@@ -14,7 +14,7 @@ use serde::{Deserialize, Serialize};
 // use rust_decimal::{MathematicalOps, Decimal};
 use std::cmp::{Ordering, PartialOrd};
 use std::collections::BinaryHeap;
-use std::fs::{create_dir_all, File};
+use std::fs::{create_dir_all, metadata, File};
 use std::io::{BufWriter, Write};
 use std::panic;
 use std::sync::atomic;
@@ -33,7 +33,7 @@ use std::collections::{HashMap, HashSet};
 use std::hash::{Hash, Hasher};
 
 pub mod multicollection;
-use multicollection::MultiCollection;
+pub use multicollection::{MultiCollection, SmallSignature};
 
 pub mod buildutils;
 use buildutils::{BuildCollection, BuildManifest};
@@ -555,6 +555,14 @@ pub fn load_collection(
         }
     });
 
+    // we support RocksDB directory paths, but nothing else, unlike sourmash.
+    if collection.is_none() {
+        let path_metadata = metadata(sigpath.clone()).expect("getting path metadata failed");
+        if path_metadata.is_dir() {
+            bail!("arbitrary directories are not supported as input");
+        }
+    }
+
     let collection =
         collection.or_else(
             || match MultiCollection::from_standalone_manifest(&sigpath) {
@@ -617,7 +625,7 @@ pub fn load_collection(
 ///
 /// # Arguments
 ///
-/// * `sketchlist` - A slice of loaded `SmallSignature` sketches.
+/// * `collection` - A MultiCollection.
 /// * `skipped_paths` - # paths that contained no compatible sketches.
 /// * `failed_paths` - # paths that failed to load.
 /// * `report_type` - ReportType Enum (Query or Against). Used to specify
@@ -999,7 +1007,7 @@ pub fn is_revindex_database(path: &camino::Utf8PathBuf) -> bool {
 }
 
 #[derive(Serialize)]
-pub struct SearchResult {
+pub struct ManySearchResult {
     pub query_name: String,
     pub query_md5: String,
     pub match_name: String,

From 9d99130590485a705fa05d9c2a15554b5d423371 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 19 Dec 2024 23:01:41 +0000
Subject: [PATCH 5/5] Bump pyo3 from 0.23.2 to 0.23.3 (#540)

Bumps [pyo3](https://github.com/pyo3/pyo3) from 0.23.2 to 0.23.3.
- [Release notes](https://github.com/pyo3/pyo3/releases)
- [Changelog](https://github.com/PyO3/pyo3/blob/main/CHANGELOG.md)
- [Commits](https://github.com/pyo3/pyo3/compare/v0.23.2...v0.23.3)

---
updated-dependencies:
- dependency-name: pyo3
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 Cargo.lock | 22 +++++++++++-----------
 Cargo.toml |  2 +-
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 933b5a90..9cf74d5b 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -206,7 +206,7 @@ dependencies = [
  "bitflags",
  "cexpr",
  "clang-sys",
- "itertools 0.11.0",
+ "itertools 0.12.1",
  "lazy_static",
  "lazycell",
  "proc-macro2",
@@ -1353,9 +1353,9 @@ dependencies = [
 
 [[package]]
 name = "pyo3"
-version = "0.23.2"
+version = "0.23.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f54b3d09cbdd1f8c20650b28e7b09e338881482f4aa908a5f61a00c98fba2690"
+checksum = "e484fd2c8b4cb67ab05a318f1fd6fa8f199fcc30819f08f07d200809dba26c15"
 dependencies = [
  "anyhow",
  "cfg-if",
@@ -1372,9 +1372,9 @@ dependencies = [
 
 [[package]]
 name = "pyo3-build-config"
-version = "0.23.2"
+version = "0.23.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3015cf985888fe66cfb63ce0e321c603706cd541b7aec7ddd35c281390af45d8"
+checksum = "dc0e0469a84f208e20044b98965e1561028180219e35352a2afaf2b942beff3b"
 dependencies = [
  "once_cell",
  "target-lexicon",
@@ -1382,9 +1382,9 @@ dependencies = [
 
 [[package]]
 name = "pyo3-ffi"
-version = "0.23.2"
+version = "0.23.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6fca7cd8fd809b5ac4eefb89c1f98f7a7651d3739dfb341ca6980090f554c270"
+checksum = "eb1547a7f9966f6f1a0f0227564a9945fe36b90da5a93b3933fc3dc03fae372d"
 dependencies = [
  "libc",
  "pyo3-build-config",
@@ -1392,9 +1392,9 @@ dependencies = [
 
 [[package]]
 name = "pyo3-macros"
-version = "0.23.2"
+version = "0.23.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "34e657fa5379a79151b6ff5328d9216a84f55dc93b17b08e7c3609a969b73aa0"
+checksum = "fdb6da8ec6fa5cedd1626c886fc8749bdcbb09424a86461eb8cdf096b7c33257"
 dependencies = [
  "proc-macro2",
  "pyo3-macros-backend",
@@ -1404,9 +1404,9 @@ dependencies = [
 
 [[package]]
 name = "pyo3-macros-backend"
-version = "0.23.2"
+version = "0.23.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "295548d5ffd95fd1981d2d3cf4458831b21d60af046b729b6fd143b0ba7aee2f"
+checksum = "38a385202ff5a92791168b1136afae5059d3ac118457bb7bc304c197c2d33e7d"
 dependencies = [
  "heck 0.5.0",
  "proc-macro2",
diff --git a/Cargo.toml b/Cargo.toml
index 4a845512..24241e9f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -9,7 +9,7 @@ name = "sourmash_plugin_branchwater"
 crate-type = ["cdylib"]
 
 [dependencies]
-pyo3 = { version = "0.23.2", features = ["extension-module", "anyhow"] }
+pyo3 = { version = "0.23.3", features = ["extension-module", "anyhow"] }
 rayon = "1.10.0"
 serde = { version = "1.0.216", features = ["derive"] }
 sourmash = { version = "0.17.2", features = ["branchwater"] }