From 31d305629b2c4d63817589d51664e78a73c621f0 Mon Sep 17 00:00:00 2001
From: Olga Botvinnik <olga.botvinnik@gmail.com>
Date: Tue, 29 Oct 2024 10:21:47 -0700
Subject: [PATCH] Python Black Formatting (#492)

* Apply black formatting

* Add black.yml workflow for checking Python formatting
---
 .github/workflows/black.yml                   |   13 +
 .../sourmash_plugin_branchwater/__init__.py   |  814 +++++---
 .../prettyprint.py                            |   27 +-
 src/python/tests/__init__.py                  |    1 +
 src/python/tests/conftest.py                  |    7 +
 src/python/tests/sourmash_tst_utils.py        |   90 +-
 src/python/tests/test_cluster.py              |  555 +++--
 src/python/tests/test_fastgather.py           | 1266 ++++++++----
 src/python/tests/test_fastmultigather.py      | 1838 ++++++++++++-----
 src/python/tests/test_index.py                |  404 ++--
 src/python/tests/test_manysearch.py           | 1039 ++++++----
 src/python/tests/test_multisearch.py          |  978 +++++----
 src/python/tests/test_pairwise.py             |  601 +++---
 src/python/tests/test_sketch.py               |  969 ++++++---
 14 files changed, 5452 insertions(+), 3150 deletions(-)
 create mode 100644 .github/workflows/black.yml

diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml
new file mode 100644
index 00000000..f29d30a2
--- /dev/null
+++ b/.github/workflows/black.yml
@@ -0,0 +1,13 @@
+name: Lint
+
+on: [push, pull_request]
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: psf/black@stable
+        with:
+          src: "./src/python"
+          options: "--check --verbose"
\ No newline at end of file
diff --git a/src/python/sourmash_plugin_branchwater/__init__.py b/src/python/sourmash_plugin_branchwater/__init__.py
index 2efc0bc6..116d2072 100755
--- a/src/python/sourmash_plugin_branchwater/__init__.py
+++ b/src/python/sourmash_plugin_branchwater/__init__.py
@@ -11,17 +11,20 @@
 
 __version__ = importlib.metadata.version("sourmash_plugin_branchwater")
 
+
 def print_version():
-    notify(f"=> sourmash_plugin_branchwater {__version__}; cite Irber et al., doi: 10.1101/2022.11.02.514947\n")
+    notify(
+        f"=> sourmash_plugin_branchwater {__version__}; cite Irber et al., doi: 10.1101/2022.11.02.514947\n"
+    )
 
 
 def get_max_cores():
     try:
-        if 'SLURM_CPUS_ON_NODE' in os.environ:
-            return int(os.environ['SLURM_CPUS_ON_NODE'])
-        elif 'SLURM_JOB_CPUS_PER_NODE' in os.environ:
-            cpus_per_node_str = os.environ['SLURM_JOB_CPUS_PER_NODE']
-            return int(cpus_per_node_str.split('x')[0])
+        if "SLURM_CPUS_ON_NODE" in os.environ:
+            return int(os.environ["SLURM_CPUS_ON_NODE"])
+        elif "SLURM_JOB_CPUS_PER_NODE" in os.environ:
+            cpus_per_node_str = os.environ["SLURM_JOB_CPUS_PER_NODE"]
+            return int(cpus_per_node_str.split("x")[0])
         else:
             return os.cpu_count()
     except Exception:
@@ -32,58 +35,101 @@ def set_thread_pool(user_cores):
     avail_threads = get_max_cores()
     num_threads = min(avail_threads, user_cores) if user_cores else avail_threads
     if user_cores and user_cores > avail_threads:
-        notify(f"warning: only {avail_threads} threads available, using {avail_threads}")
+        notify(
+            f"warning: only {avail_threads} threads available, using {avail_threads}"
+        )
     actual_rayon_cores = sourmash_plugin_branchwater.set_global_thread_pool(num_threads)
     return actual_rayon_cores
 
 
 class Branchwater_Manysearch(CommandLinePlugin):
-    command = 'manysearch'
-    description = 'search many metagenomes for contained genomes'
+    command = "manysearch"
+    description = "search many metagenomes for contained genomes"
 
     def __init__(self, p):
         super().__init__(p)
-        p.add_argument('query_paths',
-                       help="input file of sketches")
-        p.add_argument('against_paths',
-                       help="input file of sketches")
-        p.add_argument('-o', '--output', required=True,
-                       help='CSV output file for matches')
-        p.add_argument('-t', '--threshold', default=0.01, type=float,
-                       help='containment threshold for reporting matches (default: 0.01)')
-        p.add_argument('-k', '--ksize', default=31, type=int,
-                       help='k-mer size at which to select sketches')
-        p.add_argument('-s', '--scaled', default=1000, type=int,
-                       help='scaled factor at which to do comparisons')
-        p.add_argument('-m', '--moltype', default='DNA', choices = ["DNA", "protein", "dayhoff", "hp"],
-                       help = 'molecule type (DNA, protein, dayhoff, or hp; default DNA)')
-        p.add_argument('-c', '--cores', default=0, type=int,
-                       help='number of cores to use (default is all available)')
-        p.add_argument('-P', '--pretty-print', action='store_true',
-                       default=True,
-                       help="display results after search finishes (default: True)")
-        p.add_argument('-N', '--no-pretty-print', action='store_false',
-                       dest='pretty_print',
-                       help="do not display results (e.g. for large output)")
-        p.add_argument('--ignore-abundance', action='store_true',
-                       help="do not do expensive abundance calculations")
+        p.add_argument("query_paths", help="input file of sketches")
+        p.add_argument("against_paths", help="input file of sketches")
+        p.add_argument(
+            "-o", "--output", required=True, help="CSV output file for matches"
+        )
+        p.add_argument(
+            "-t",
+            "--threshold",
+            default=0.01,
+            type=float,
+            help="containment threshold for reporting matches (default: 0.01)",
+        )
+        p.add_argument(
+            "-k",
+            "--ksize",
+            default=31,
+            type=int,
+            help="k-mer size at which to select sketches",
+        )
+        p.add_argument(
+            "-s",
+            "--scaled",
+            default=1000,
+            type=int,
+            help="scaled factor at which to do comparisons",
+        )
+        p.add_argument(
+            "-m",
+            "--moltype",
+            default="DNA",
+            choices=["DNA", "protein", "dayhoff", "hp"],
+            help="molecule type (DNA, protein, dayhoff, or hp; default DNA)",
+        )
+        p.add_argument(
+            "-c",
+            "--cores",
+            default=0,
+            type=int,
+            help="number of cores to use (default is all available)",
+        )
+        p.add_argument(
+            "-P",
+            "--pretty-print",
+            action="store_true",
+            default=True,
+            help="display results after search finishes (default: True)",
+        )
+        p.add_argument(
+            "-N",
+            "--no-pretty-print",
+            action="store_false",
+            dest="pretty_print",
+            help="do not display results (e.g. for large output)",
+        )
+        p.add_argument(
+            "--ignore-abundance",
+            action="store_true",
+            help="do not do expensive abundance calculations",
+        )
 
     def main(self, args):
         print_version()
-        notify(f"ksize: {args.ksize} / scaled: {args.scaled} / moltype: {args.moltype} / threshold: {args.threshold}")
+        notify(
+            f"ksize: {args.ksize} / scaled: {args.scaled} / moltype: {args.moltype} / threshold: {args.threshold}"
+        )
         num_threads = set_thread_pool(args.cores)
 
-        notify(f"searching all sketches in '{args.query_paths}' against '{args.against_paths}' using {num_threads} threads")
+        notify(
+            f"searching all sketches in '{args.query_paths}' against '{args.against_paths}' using {num_threads} threads"
+        )
 
         super().main(args)
-        status = sourmash_plugin_branchwater.do_manysearch(args.query_paths,
-                                                           args.against_paths,
-                                                           args.threshold,
-                                                           args.ksize,
-                                                           args.scaled,
-                                                           args.moltype,
-                                                           args.output,
-                                                           args.ignore_abundance)
+        status = sourmash_plugin_branchwater.do_manysearch(
+            args.query_paths,
+            args.against_paths,
+            args.threshold,
+            args.ksize,
+            args.scaled,
+            args.moltype,
+            args.output,
+            args.ignore_abundance,
+        )
         if status == 0:
             notify(f"...manysearch is done! results in '{args.output}'")
 
@@ -93,46 +139,80 @@ def main(self, args):
 
 
 class Branchwater_Fastgather(CommandLinePlugin):
-    command = 'fastgather'
-    description = 'massively parallel sketch gather'
+    command = "fastgather"
+    description = "massively parallel sketch gather"
 
     def __init__(self, p):
         super().__init__(p)
-        p.add_argument('query_sig', help="metagenome sketch")
-        p.add_argument('against_paths', help="input file of sketches")
-        p.add_argument('-o', '--output-gather', required=True,
-                       help="save gather output (minimum metagenome cover) to this file")
-        p.add_argument('--output-prefetch',
-                       help="save prefetch output (all overlaps) to this file")
-        p.add_argument('-t', '--threshold-bp', default=50000, type=float,
-                       help='threshold in estimated base pairs, for reporting matches (default: 50kb)')
-        p.add_argument('-k', '--ksize', default=31, type=int,
-                       help='k-mer size at which to do comparisons (default: 31)')
-        p.add_argument('-s', '--scaled', default=1000, type=int,
-                       help='scaled factor at which to do comparisons (default: 1000)')
-        p.add_argument('-m', '--moltype', default='DNA', choices = ["DNA", "protein", "dayhoff", "hp"],
-                       help = 'molecule type (DNA, protein, dayhoff, or hp; default DNA)')
-        p.add_argument('-c', '--cores', default=0, type=int,
-                help='number of cores to use (default is all available)')
-
+        p.add_argument("query_sig", help="metagenome sketch")
+        p.add_argument("against_paths", help="input file of sketches")
+        p.add_argument(
+            "-o",
+            "--output-gather",
+            required=True,
+            help="save gather output (minimum metagenome cover) to this file",
+        )
+        p.add_argument(
+            "--output-prefetch", help="save prefetch output (all overlaps) to this file"
+        )
+        p.add_argument(
+            "-t",
+            "--threshold-bp",
+            default=50000,
+            type=float,
+            help="threshold in estimated base pairs, for reporting matches (default: 50kb)",
+        )
+        p.add_argument(
+            "-k",
+            "--ksize",
+            default=31,
+            type=int,
+            help="k-mer size at which to do comparisons (default: 31)",
+        )
+        p.add_argument(
+            "-s",
+            "--scaled",
+            default=1000,
+            type=int,
+            help="scaled factor at which to do comparisons (default: 1000)",
+        )
+        p.add_argument(
+            "-m",
+            "--moltype",
+            default="DNA",
+            choices=["DNA", "protein", "dayhoff", "hp"],
+            help="molecule type (DNA, protein, dayhoff, or hp; default DNA)",
+        )
+        p.add_argument(
+            "-c",
+            "--cores",
+            default=0,
+            type=int,
+            help="number of cores to use (default is all available)",
+        )
 
     def main(self, args):
         print_version()
-        notify(f"ksize: {args.ksize} / scaled: {args.scaled} / moltype: {args.moltype} / threshold bp: {args.threshold_bp}")
+        notify(
+            f"ksize: {args.ksize} / scaled: {args.scaled} / moltype: {args.moltype} / threshold bp: {args.threshold_bp}"
+        )
 
         num_threads = set_thread_pool(args.cores)
 
-
-        notify(f"gathering all sketches in '{args.query_sig}' against '{args.against_paths}' using {num_threads} threads")
+        notify(
+            f"gathering all sketches in '{args.query_sig}' against '{args.against_paths}' using {num_threads} threads"
+        )
         super().main(args)
-        status = sourmash_plugin_branchwater.do_fastgather(args.query_sig,
-                                                           args.against_paths,
-                                                           int(args.threshold_bp),
-                                                           args.ksize,
-                                                           args.scaled,
-                                                           args.moltype,
-                                                           args.output_gather,
-                                                           args.output_prefetch)
+        status = sourmash_plugin_branchwater.do_fastgather(
+            args.query_sig,
+            args.against_paths,
+            int(args.threshold_bp),
+            args.ksize,
+            args.scaled,
+            args.moltype,
+            args.output_gather,
+            args.output_prefetch,
+        )
         if status == 0:
             notify(f"...fastgather is done! gather results in '{args.output_gather}'")
             if args.output_prefetch:
@@ -141,107 +221,182 @@ def main(self, args):
 
 
 class Branchwater_Fastmultigather(CommandLinePlugin):
-    command = 'fastmultigather'
-    description = 'massively parallel sketch multigather'
+    command = "fastmultigather"
+    description = "massively parallel sketch multigather"
 
     def __init__(self, p):
         super().__init__(p)
-        p.add_argument('query_paths', help="input file of sketches to query")
-        p.add_argument('against_paths', help="input file of sketches to search against \
-                       OR a branchwater indexed database generated with 'sourmash scripts index'")
-        p.add_argument('-t', '--threshold-bp', default=50000, type=float,
-                       help='threshold in estimated base pairs, for reporting matches (default: 50kb)')
-        p.add_argument('-k', '--ksize', default=31, type=int,
-                       help='k-mer size at which to do comparisons (default: 31)')
-        p.add_argument('-s', '--scaled', default=1000, type=int,
-                       help='scaled factor at which to do comparisons (default: 1000)')
-        p.add_argument('-m', '--moltype', default='DNA', choices = ["DNA", "protein", "dayhoff", "hp"],
-                       help = 'molecule type (DNA, protein, dayhoff, or hp; default DNA)')
-        p.add_argument('-c', '--cores', default=0, type=int,
-                help='number of cores to use (default is all available)')
-        p.add_argument('-o', '--output', help='CSV output file for matches. Used for non-rocksdb searches only.')
-        p.add_argument('--create-empty-results', action = 'store_true',
-                       default=False, help='create empty results file(s) even if no matches')
-        p.add_argument('--save-matches', action='store_true',
-                       default=False, help='save matched hashes for every input to a signature')
-
+        p.add_argument("query_paths", help="input file of sketches to query")
+        p.add_argument(
+            "against_paths",
+            help="input file of sketches to search against \
+                       OR a branchwater indexed database generated with 'sourmash scripts index'",
+        )
+        p.add_argument(
+            "-t",
+            "--threshold-bp",
+            default=50000,
+            type=float,
+            help="threshold in estimated base pairs, for reporting matches (default: 50kb)",
+        )
+        p.add_argument(
+            "-k",
+            "--ksize",
+            default=31,
+            type=int,
+            help="k-mer size at which to do comparisons (default: 31)",
+        )
+        p.add_argument(
+            "-s",
+            "--scaled",
+            default=1000,
+            type=int,
+            help="scaled factor at which to do comparisons (default: 1000)",
+        )
+        p.add_argument(
+            "-m",
+            "--moltype",
+            default="DNA",
+            choices=["DNA", "protein", "dayhoff", "hp"],
+            help="molecule type (DNA, protein, dayhoff, or hp; default DNA)",
+        )
+        p.add_argument(
+            "-c",
+            "--cores",
+            default=0,
+            type=int,
+            help="number of cores to use (default is all available)",
+        )
+        p.add_argument(
+            "-o",
+            "--output",
+            help="CSV output file for matches. Used for non-rocksdb searches only.",
+        )
+        p.add_argument(
+            "--create-empty-results",
+            action="store_true",
+            default=False,
+            help="create empty results file(s) even if no matches",
+        )
+        p.add_argument(
+            "--save-matches",
+            action="store_true",
+            default=False,
+            help="save matched hashes for every input to a signature",
+        )
 
     def main(self, args):
         print_version()
-        notify(f"ksize: {args.ksize} / scaled: {args.scaled} / moltype: {args.moltype} / threshold bp: {args.threshold_bp} / save matches: {args.save_matches}")
+        notify(
+            f"ksize: {args.ksize} / scaled: {args.scaled} / moltype: {args.moltype} / threshold bp: {args.threshold_bp} / save matches: {args.save_matches}"
+        )
 
         num_threads = set_thread_pool(args.cores)
 
-        notify(f"gathering all sketches in '{args.query_paths}' against '{args.against_paths}' using {num_threads} threads")
+        notify(
+            f"gathering all sketches in '{args.query_paths}' against '{args.against_paths}' using {num_threads} threads"
+        )
         super().main(args)
-        status = sourmash_plugin_branchwater.do_fastmultigather(args.query_paths,
-                                                                args.against_paths,
-                                                                int(args.threshold_bp),
-                                                                args.ksize,
-                                                                args.scaled,
-                                                                args.moltype,
-                                                                args.output,
-                                                                args.save_matches,
-                                                                args.create_empty_results
-                                                                )
+        status = sourmash_plugin_branchwater.do_fastmultigather(
+            args.query_paths,
+            args.against_paths,
+            int(args.threshold_bp),
+            args.ksize,
+            args.scaled,
+            args.moltype,
+            args.output,
+            args.save_matches,
+            args.create_empty_results,
+        )
         if status == 0:
             notify(f"...fastmultigather is done!")
         return status
 
 
 class Branchwater_Index(CommandLinePlugin):
-    command = 'index'
-    description = 'Build Branchwater RevIndex'
+    command = "index"
+    description = "Build Branchwater RevIndex"
 
     def __init__(self, p):
         super().__init__(p)
-        p.add_argument('siglist',
-                       help="input file of sketches")
-        p.add_argument('-o', '--output', required=True,
-                       help='output file for the index')
-        p.add_argument('-k', '--ksize', default=31, type=int,
-                       help='k-mer size at which to select sketches')
-        p.add_argument('-s', '--scaled', default=1000, type=int,
-                       help='scaled factor at which to do comparisons')
-        p.add_argument('-m', '--moltype', default='DNA', choices = ["DNA", "protein", "dayhoff", "hp"],
-                       help = 'molecule type (DNA, protein, dayhoff, or hp; default DNA)')
-        p.add_argument('-c', '--cores', default=0, type=int,
-                       help='number of cores to use (default is all available)')
-        p.add_argument('--internal-storage', default=True, action='store_true',
-                       help="build indexes that contain sketches and are relocatable (default: True)")
-        p.add_argument('--no-internal-storage', '--no-store-sketches',
-                       action='store_false',
-                       help="do not store sketches in the index; index may not be relocatable (default: False)",
-                       dest='internal_storage')
+        p.add_argument("siglist", help="input file of sketches")
+        p.add_argument(
+            "-o", "--output", required=True, help="output file for the index"
+        )
+        p.add_argument(
+            "-k",
+            "--ksize",
+            default=31,
+            type=int,
+            help="k-mer size at which to select sketches",
+        )
+        p.add_argument(
+            "-s",
+            "--scaled",
+            default=1000,
+            type=int,
+            help="scaled factor at which to do comparisons",
+        )
+        p.add_argument(
+            "-m",
+            "--moltype",
+            default="DNA",
+            choices=["DNA", "protein", "dayhoff", "hp"],
+            help="molecule type (DNA, protein, dayhoff, or hp; default DNA)",
+        )
+        p.add_argument(
+            "-c",
+            "--cores",
+            default=0,
+            type=int,
+            help="number of cores to use (default is all available)",
+        )
+        p.add_argument(
+            "--internal-storage",
+            default=True,
+            action="store_true",
+            help="build indexes that contain sketches and are relocatable (default: True)",
+        )
+        p.add_argument(
+            "--no-internal-storage",
+            "--no-store-sketches",
+            action="store_false",
+            help="do not store sketches in the index; index may not be relocatable (default: False)",
+            dest="internal_storage",
+        )
 
     def main(self, args):
-        notify(f"ksize: {args.ksize} / scaled: {args.scaled} / moltype: {args.moltype} ")
+        notify(
+            f"ksize: {args.ksize} / scaled: {args.scaled} / moltype: {args.moltype} "
+        )
 
         num_threads = set_thread_pool(args.cores)
 
         notify(f"indexing all sketches in '{args.siglist}'")
 
         super().main(args)
-        status = sourmash_plugin_branchwater.do_index(args.siglist,
-                                                      args.ksize,
-                                                      args.scaled,
-                                                      args.moltype,
-                                                      args.output,
-                                                      False, # colors - currently must be false?
-                                                      args.internal_storage)
+        status = sourmash_plugin_branchwater.do_index(
+            args.siglist,
+            args.ksize,
+            args.scaled,
+            args.moltype,
+            args.output,
+            False,  # colors - currently must be false?
+            args.internal_storage,
+        )
         if status == 0:
             notify(f"...index is done! results in '{args.output}'")
         return status
 
+
 class Branchwater_Check(CommandLinePlugin):
-    command = 'check'
-    description = 'Check Branchwater RevIndex'
+    command = "check"
+    description = "Check Branchwater RevIndex"
 
     def __init__(self, p):
         super().__init__(p)
-        p.add_argument('index',
-                       help="RocksDB index file created with 'index'")
-        p.add_argument('--quick', action='store_true')
+        p.add_argument("index", help="RocksDB index file created with 'index'")
+        p.add_argument("--quick", action="store_true")
 
     def main(self, args):
         notify(f"checking index '{args.index}'")
@@ -253,109 +408,191 @@ def main(self, args):
 
 
 class Branchwater_Multisearch(CommandLinePlugin):
-    command = 'multisearch'
-    description = 'massively parallel in-memory sketch search'
+    command = "multisearch"
+    description = "massively parallel in-memory sketch search"
 
     def __init__(self, p):
         super().__init__(p)
-        p.add_argument('query_paths',
-                       help="input file of sketches")
-        p.add_argument('against_paths',
-                       help="input file of sketches")
-        p.add_argument('-o', '--output', required=True,
-                       help='CSV output file for matches')
-        p.add_argument('-t', '--threshold', default=0.01, type=float,
-                       help='containment threshold for reporting matches (default: 0.01)')
-        p.add_argument('-k', '--ksize', default=31, type=int,
-                       help='k-mer size at which to select sketches')
-        p.add_argument('-s', '--scaled', default=1000, type=int,
-                       help='scaled factor at which to do comparisons')
-        p.add_argument('-m', '--moltype', default='DNA', choices = ["DNA", "protein", "dayhoff", "hp"],
-                       help = 'molecule type (DNA, protein, dayhoff, or hp; default DNA)')
-        p.add_argument('-c', '--cores', default=0, type=int,
-                       help='number of cores to use (default is all available)')
-        p.add_argument('-a', '--ani', action='store_true',
-                       help='estimate ANI from containment')
+        p.add_argument("query_paths", help="input file of sketches")
+        p.add_argument("against_paths", help="input file of sketches")
+        p.add_argument(
+            "-o", "--output", required=True, help="CSV output file for matches"
+        )
+        p.add_argument(
+            "-t",
+            "--threshold",
+            default=0.01,
+            type=float,
+            help="containment threshold for reporting matches (default: 0.01)",
+        )
+        p.add_argument(
+            "-k",
+            "--ksize",
+            default=31,
+            type=int,
+            help="k-mer size at which to select sketches",
+        )
+        p.add_argument(
+            "-s",
+            "--scaled",
+            default=1000,
+            type=int,
+            help="scaled factor at which to do comparisons",
+        )
+        p.add_argument(
+            "-m",
+            "--moltype",
+            default="DNA",
+            choices=["DNA", "protein", "dayhoff", "hp"],
+            help="molecule type (DNA, protein, dayhoff, or hp; default DNA)",
+        )
+        p.add_argument(
+            "-c",
+            "--cores",
+            default=0,
+            type=int,
+            help="number of cores to use (default is all available)",
+        )
+        p.add_argument(
+            "-a", "--ani", action="store_true", help="estimate ANI from containment"
+        )
 
     def main(self, args):
         print_version()
-        notify(f"ksize: {args.ksize} / scaled: {args.scaled} / moltype: {args.moltype} / threshold: {args.threshold}")
+        notify(
+            f"ksize: {args.ksize} / scaled: {args.scaled} / moltype: {args.moltype} / threshold: {args.threshold}"
+        )
 
         num_threads = set_thread_pool(args.cores)
 
-        notify(f"searching all sketches in '{args.query_paths}' against '{args.against_paths}' using {num_threads} threads")
+        notify(
+            f"searching all sketches in '{args.query_paths}' against '{args.against_paths}' using {num_threads} threads"
+        )
 
         super().main(args)
-        status = sourmash_plugin_branchwater.do_multisearch(args.query_paths,
-                                                            args.against_paths,
-                                                            args.threshold,
-                                                            args.ksize,
-                                                            args.scaled,
-                                                            args.moltype,
-                                                            args.ani,
-                                                            args.output)
+        status = sourmash_plugin_branchwater.do_multisearch(
+            args.query_paths,
+            args.against_paths,
+            args.threshold,
+            args.ksize,
+            args.scaled,
+            args.moltype,
+            args.ani,
+            args.output,
+        )
         if status == 0:
             notify(f"...multisearch is done! results in '{args.output}'")
         return status
-    
+
+
 class Branchwater_Pairwise(CommandLinePlugin):
-    command = 'pairwise'
-    description = 'massively parallel in-memory pairwise comparisons'
+    command = "pairwise"
+    description = "massively parallel in-memory pairwise comparisons"
 
     def __init__(self, p):
         super().__init__(p)
-        p.add_argument('sig_paths',
-                       help="input file of sketches")
-        p.add_argument('-o', '--output', required=True,
-                       help='CSV output file for matches')
-        p.add_argument('-t', '--threshold', default=0.01, type=float,
-                       help='containment threshold for reporting matches')
-        p.add_argument('-k', '--ksize', default=31, type=int,
-                       help='k-mer size at which to select sketches')
-        p.add_argument('-s', '--scaled', default=1000, type=int,
-                       help='scaled factor at which to do comparisons')
-        p.add_argument('-m', '--moltype', default='DNA', choices = ["DNA", "protein", "dayhoff", "hp"],
-                       help = 'molecule type (DNA, protein, dayhoff, or hp; default DNA)')
-        p.add_argument('-c', '--cores', default=0, type=int,
-                       help='number of cores to use (default is all available)')
-        p.add_argument('-a', '--ani', action='store_true',
-                       help='estimate ANI from containment')
-        p.add_argument('--write-all', action="store_true",
-                       help="write self comparisons for all sketches")
+        p.add_argument("sig_paths", help="input file of sketches")
+        p.add_argument(
+            "-o", "--output", required=True, help="CSV output file for matches"
+        )
+        p.add_argument(
+            "-t",
+            "--threshold",
+            default=0.01,
+            type=float,
+            help="containment threshold for reporting matches",
+        )
+        p.add_argument(
+            "-k",
+            "--ksize",
+            default=31,
+            type=int,
+            help="k-mer size at which to select sketches",
+        )
+        p.add_argument(
+            "-s",
+            "--scaled",
+            default=1000,
+            type=int,
+            help="scaled factor at which to do comparisons",
+        )
+        p.add_argument(
+            "-m",
+            "--moltype",
+            default="DNA",
+            choices=["DNA", "protein", "dayhoff", "hp"],
+            help="molecule type (DNA, protein, dayhoff, or hp; default DNA)",
+        )
+        p.add_argument(
+            "-c",
+            "--cores",
+            default=0,
+            type=int,
+            help="number of cores to use (default is all available)",
+        )
+        p.add_argument(
+            "-a", "--ani", action="store_true", help="estimate ANI from containment"
+        )
+        p.add_argument(
+            "--write-all",
+            action="store_true",
+            help="write self comparisons for all sketches",
+        )
 
     def main(self, args):
         print_version()
-        notify(f"ksize: {args.ksize} / scaled: {args.scaled} / moltype: {args.moltype} / threshold: {args.threshold}")
+        notify(
+            f"ksize: {args.ksize} / scaled: {args.scaled} / moltype: {args.moltype} / threshold: {args.threshold}"
+        )
 
         num_threads = set_thread_pool(args.cores)
 
-        notify(f"pairwise-comparing all sketches in '{args.sig_paths}' using {num_threads} threads")
+        notify(
+            f"pairwise-comparing all sketches in '{args.sig_paths}' using {num_threads} threads"
+        )
 
         super().main(args)
-        status = sourmash_plugin_branchwater.do_pairwise(args.sig_paths,
-                                                            args.threshold,
-                                                            args.ksize,
-                                                            args.scaled,
-                                                            args.moltype,
-                                                            args.ani,
-                                                            args.write_all,
-                                                            args.output)
+        status = sourmash_plugin_branchwater.do_pairwise(
+            args.sig_paths,
+            args.threshold,
+            args.ksize,
+            args.scaled,
+            args.moltype,
+            args.ani,
+            args.write_all,
+            args.output,
+        )
         if status == 0:
             notify(f"...pairwise is done! results in '{args.output}'")
         return status
 
+
 class Branchwater_SingleSketch(CommandLinePlugin):
-    command = 'singlesketch'
-    description = 'sketch a single sequence file'
+    command = "singlesketch"
+    description = "sketch a single sequence file"
 
     def __init__(self, p):
         super().__init__(p)
-        p.add_argument('input_filename', help="input FASTA file or '-' for stdin")
-        p.add_argument('-o', '--output', required=True,
-                       help='output file for the signature or - for stdout')
-        p.add_argument('-p', '--param-string', action='append', type=str, default=[],
-                       help='parameter string for sketching (default: k=31,scaled=1000)')
-        p.add_argument('-n', '--name', help="optional name for the signature, default is the basename of input path")
+        p.add_argument("input_filename", help="input FASTA file or '-' for stdin")
+        p.add_argument(
+            "-o",
+            "--output",
+            required=True,
+            help="output file for the signature or - for stdout",
+        )
+        p.add_argument(
+            "-p",
+            "--param-string",
+            action="append",
+            type=str,
+            default=[],
+            help="parameter string for sketching (default: k=31,scaled=1000)",
+        )
+        p.add_argument(
+            "-n",
+            "--name",
+            help="optional name for the signature, default is the basename of input path",
+        )
 
     def main(self, args):
         print_version()
@@ -378,38 +615,70 @@ def main(self, args):
         args.param_string = "_".join(updated_param_strings).lower()
 
         # If --name is not provided, default to input_filename, but if the source file is -, set name to empty string
-        signature_name = args.name if args.name else os.path.basename(args.input_filename) if args.input_filename != "-" else ""
-
-        notify(f"sketching file '{args.input_filename}' with params '{args.param_string}' and name '{signature_name}'")
+        signature_name = (
+            args.name
+            if args.name
+            else (
+                os.path.basename(args.input_filename)
+                if args.input_filename != "-"
+                else ""
+            )
+        )
+
+        notify(
+            f"sketching file '{args.input_filename}' with params '{args.param_string}' and name '{signature_name}'"
+        )
 
         super().main(args)
-        status = sourmash_plugin_branchwater.do_singlesketch(args.input_filename,
-                                                             args.param_string,
-                                                             args.output,
-                                                             signature_name)  # Pass the name to Rust
+        status = sourmash_plugin_branchwater.do_singlesketch(
+            args.input_filename, args.param_string, args.output, signature_name
+        )  # Pass the name to Rust
         if status == 0:
             notify(f"...singlesketch is done! results in '{args.output}'")
         return status
 
 
 class Branchwater_Manysketch(CommandLinePlugin):
-    command = 'manysketch'
-    description = 'massively parallel sketching'
+    command = "manysketch"
+    description = "massively parallel sketching"
 
     def __init__(self, p):
         super().__init__(p)
-        p.add_argument('fromfile_csv', help="a csv file containing paths to FASTA files. \
-                        Columns must be: 'name,genome_filename,protein_filename' or 'name,read1,read2'")
-        p.add_argument('-o', '--output', required=True,
-                       help='output zip file for the signatures')
-        p.add_argument('-p', '--param-string', action='append', type=str, default=[],
-                          help='parameter string for sketching (default: k=31,scaled=1000)')
-        p.add_argument('-c', '--cores', default=0, type=int,
-                       help='number of cores to use (default is all available)')
-        p.add_argument('-s', '--singleton', action="store_true",
-                       help='build one sketch per FASTA record, i.e. multiple sketches per FASTA file')
-        p.add_argument('-f', '--force', action="store_true",
-                       help='allow use of individual FASTA files in more than more sketch')
+        p.add_argument(
+            "fromfile_csv",
+            help="a csv file containing paths to FASTA files. \
+                        Columns must be: 'name,genome_filename,protein_filename' or 'name,read1,read2'",
+        )
+        p.add_argument(
+            "-o", "--output", required=True, help="output zip file for the signatures"
+        )
+        p.add_argument(
+            "-p",
+            "--param-string",
+            action="append",
+            type=str,
+            default=[],
+            help="parameter string for sketching (default: k=31,scaled=1000)",
+        )
+        p.add_argument(
+            "-c",
+            "--cores",
+            default=0,
+            type=int,
+            help="number of cores to use (default is all available)",
+        )
+        p.add_argument(
+            "-s",
+            "--singleton",
+            action="store_true",
+            help="build one sketch per FASTA record, i.e. multiple sketches per FASTA file",
+        )
+        p.add_argument(
+            "-f",
+            "--force",
+            action="store_true",
+            help="allow use of individual FASTA files in more than more sketch",
+        )
 
     def main(self, args):
         print_version()
@@ -424,50 +693,87 @@ def main(self, args):
 
         num_threads = set_thread_pool(args.cores)
 
-        notify(f"sketching all files in '{args.fromfile_csv}' using {num_threads} threads")
+        notify(
+            f"sketching all files in '{args.fromfile_csv}' using {num_threads} threads"
+        )
 
         super().main(args)
-        status = sourmash_plugin_branchwater.do_manysketch(args.fromfile_csv,
-                                                           args.param_string,
-                                                           args.output,
-                                                           args.singleton,
-                                                           args.force)
+        status = sourmash_plugin_branchwater.do_manysketch(
+            args.fromfile_csv,
+            args.param_string,
+            args.output,
+            args.singleton,
+            args.force,
+        )
         if status == 0:
             notify(f"...manysketch is done! results in '{args.output}'")
         return status
 
+
 class Branchwater_Cluster(CommandLinePlugin):
-    command = 'cluster'
+    command = "cluster"
     description = 'cluster from "pairwise" or "multisearch" results'
 
     def __init__(self, p):
         super().__init__(p)
-        p.add_argument('pairwise_csv', help="a csv file containing similarity information. \
-                        Currently, only a branchwater 'pairwise' or 'multisearch' file will work")
-        p.add_argument('-o', '--output', required=True,
-                       help='output csv file for the clusters')
-        p.add_argument('--cluster-sizes', default=None,
-                       help='output file for the cluster size histogram')
-        p.add_argument('--similarity-column', type=str, default='average_containment_ani',
-                       choices=['containment', 'max_containment', 'jaccard', 'average_containment_ani', 'max_containment_ani'],
-                       help='column to use as similarity measure')
-        p.add_argument('-t', '--threshold',  type=float, default=0.95, help="similarity threshold for clustering. Default: 95%% ANI (0.95)")
-        p.add_argument('-c', '--cores', default=0, type=int,
-                       help='number of cores to use (default is all available)')
+        p.add_argument(
+            "pairwise_csv",
+            help="a csv file containing similarity information. \
+                        Currently, only a branchwater 'pairwise' or 'multisearch' file will work",
+        )
+        p.add_argument(
+            "-o", "--output", required=True, help="output csv file for the clusters"
+        )
+        p.add_argument(
+            "--cluster-sizes",
+            default=None,
+            help="output file for the cluster size histogram",
+        )
+        p.add_argument(
+            "--similarity-column",
+            type=str,
+            default="average_containment_ani",
+            choices=[
+                "containment",
+                "max_containment",
+                "jaccard",
+                "average_containment_ani",
+                "max_containment_ani",
+            ],
+            help="column to use as similarity measure",
+        )
+        p.add_argument(
+            "-t",
+            "--threshold",
+            type=float,
+            default=0.95,
+            help="similarity threshold for clustering. Default: 95%% ANI (0.95)",
+        )
+        p.add_argument(
+            "-c",
+            "--cores",
+            default=0,
+            type=int,
+            help="number of cores to use (default is all available)",
+        )
 
     def main(self, args):
         print_version()
 
         num_threads = set_thread_pool(args.cores)
 
-        notify(f"generating clusters for comparisons in '{args.pairwise_csv}' using {num_threads} threads")
+        notify(
+            f"generating clusters for comparisons in '{args.pairwise_csv}' using {num_threads} threads"
+        )
 
         super().main(args)
-        status = sourmash_plugin_branchwater.do_cluster(args.pairwise_csv,
-                                                        args.output,
-                                                        args.similarity_column,
-                                                        args.threshold,
-                                                        args.cluster_sizes)
+        status = sourmash_plugin_branchwater.do_cluster(
+            args.pairwise_csv,
+            args.output,
+            args.similarity_column,
+            args.threshold,
+            args.cluster_sizes,
+        )
         if status == 0:
             notify(f"...clustering is done! results in '{args.output}'")
             notify(f"                       cluster counts in '{args.cluster_sizes}'")
diff --git a/src/python/sourmash_plugin_branchwater/prettyprint.py b/src/python/sourmash_plugin_branchwater/prettyprint.py
index f948f792..3390d7f6 100644
--- a/src/python/sourmash_plugin_branchwater/prettyprint.py
+++ b/src/python/sourmash_plugin_branchwater/prettyprint.py
@@ -1,16 +1,17 @@
 import csv
 
+
 def pretty_print_manysearch(manysearch_csv):
     "Pretty-print the manysearch output."
-    with open(manysearch_csv, newline='') as fp:
+    with open(manysearch_csv, newline="") as fp:
         r = csv.DictReader(fp)
         rows = list(r)
 
-    rows.sort(key=lambda row: row['query_name']) # sort by metagenome, for now
+    rows.sort(key=lambda row: row["query_name"])  # sort by metagenome, for now
 
     first = True
     for row in rows:
-        has_abundance = 'average_abund' in row
+        has_abundance = "average_abund" in row
 
         #
         # display!
@@ -23,21 +24,25 @@ def pretty_print_manysearch(manysearch_csv):
             print("--------          -------- ---------   -------   ---------------")
             first = False
 
-        f_genome_found = float(row['containment'])
+        f_genome_found = float(row["containment"])
         pct_genome = f"{f_genome_found*100:.1f}"
 
         if has_abundance:
-            n_weighted_found = int(row['n_weighted_found'])
-            total_weighted_hashes = int(row['total_weighted_hashes'])
-            f_metag_weighted = n_weighted_found / total_weighted_hashes # results_d['f_match_weighted']
+            n_weighted_found = int(row["n_weighted_found"])
+            total_weighted_hashes = int(row["total_weighted_hashes"])
+            f_metag_weighted = (
+                n_weighted_found / total_weighted_hashes
+            )  # results_d['f_match_weighted']
             pct_metag = f"{f_metag_weighted*100:.1f}%"
 
-            avg_abund = float(row['average_abund'])
+            avg_abund = float(row["average_abund"])
             avg_abund = f"{avg_abund:.1f}"
         else:
             avg_abund = "N/A"
             pct_metag = "N/A"
 
-        query_name = row['query_name'][:17]
-        metag_name = row['match_name'][:17]
-        print(f'{query_name:<17} {pct_genome:>6}%  {avg_abund:>6}     {pct_metag:>6}     {metag_name}')
+        query_name = row["query_name"][:17]
+        metag_name = row["match_name"][:17]
+        print(
+            f"{query_name:<17} {pct_genome:>6}%  {avg_abund:>6}     {pct_metag:>6}     {metag_name}"
+        )
diff --git a/src/python/tests/__init__.py b/src/python/tests/__init__.py
index c1a35185..384ea8b8 100644
--- a/src/python/tests/__init__.py
+++ b/src/python/tests/__init__.py
@@ -1,2 +1,3 @@
 from sourmash_plugin_branchwater import sourmash_plugin_branchwater
+
 sourmash_plugin_branchwater.set_global_thread_pool(4)
diff --git a/src/python/tests/conftest.py b/src/python/tests/conftest.py
index f6f0f7f4..75a4b8f4 100644
--- a/src/python/tests/conftest.py
+++ b/src/python/tests/conftest.py
@@ -2,6 +2,7 @@
 
 from .sourmash_tst_utils import TempDirectory, RunnerContext
 
+
 @pytest.fixture
 def runtmp():
     with TempDirectory() as location:
@@ -12,26 +13,32 @@ def runtmp():
 def toggle_internal_storage(request):
     return request.param
 
+
 @pytest.fixture(params=[True, False])
 def zip_query(request):
     return request.param
 
+
 @pytest.fixture(params=[True, False])
 def zip_db(request):
     return request.param
 
+
 @pytest.fixture(params=[True, False])
 def zip_against(request):
     return request.param
 
+
 @pytest.fixture(params=[True, False])
 def indexed(request):
     return request.param
 
+
 @pytest.fixture(params=[True, False])
 def indexed_query(request):
     return request.param
 
+
 @pytest.fixture(params=[True, False])
 def indexed_against(request):
     return request.param
diff --git a/src/python/tests/sourmash_tst_utils.py b/src/python/tests/sourmash_tst_utils.py
index 0c0e0e00..dabce721 100644
--- a/src/python/tests/sourmash_tst_utils.py
+++ b/src/python/tests/sourmash_tst_utils.py
@@ -16,31 +16,49 @@
 
 def get_test_data(filename):
     thisdir = os.path.dirname(__file__)
-    return os.path.join(thisdir, 'test-data', filename)
+    return os.path.join(thisdir, "test-data", filename)
 
 
 def make_file_list(filename, paths):
-    with open(filename, 'wt') as fp:
+    with open(filename, "wt") as fp:
         fp.write("\n".join(paths))
         fp.write("\n")
 
 
 def zip_siglist(runtmp, siglist, db):
-    runtmp.sourmash('sig', 'cat', siglist,
-                    '-o', db)
+    runtmp.sourmash("sig", "cat", siglist, "-o", db)
     return db
 
 
-def index_siglist(runtmp, siglist, db, *, ksize=31, scaled=1000, moltype='DNA',
-                  toggle_internal_storage='--internal-storage'):
+def index_siglist(
+    runtmp,
+    siglist,
+    db,
+    *,
+    ksize=31,
+    scaled=1000,
+    moltype="DNA",
+    toggle_internal_storage="--internal-storage",
+):
     # build index
-    runtmp.sourmash('scripts', 'index', siglist,
-                    '-o', db, '-k', str(ksize), '--scaled', str(scaled),
-                    '--moltype', moltype, toggle_internal_storage)
+    runtmp.sourmash(
+        "scripts",
+        "index",
+        siglist,
+        "-o",
+        db,
+        "-k",
+        str(ksize),
+        "--scaled",
+        str(scaled),
+        "--moltype",
+        moltype,
+        toggle_internal_storage,
+    )
     return db
 
 
-def scriptpath(scriptname='sourmash'):
+def scriptpath(scriptname="sourmash"):
     """Return the path to the scripts, in both dev and install situations."""
     # note - it doesn't matter what the scriptname is here, as long as
     # it's some script present in this version of sourmash.
@@ -53,7 +71,7 @@ def scriptpath(scriptname='sourmash'):
     if os.path.exists(os.path.join(path, scriptname)):
         return path
 
-    for path in os.environ['PATH'].split(':'):
+    for path in os.environ["PATH"].split(":"):
         if os.path.exists(os.path.join(path, scriptname)):
             return path
 
@@ -61,10 +79,10 @@ def scriptpath(scriptname='sourmash'):
 def _runscript(scriptname):
     """Find & run a script with exec (i.e. not via os.system or subprocess)."""
     namespace = {"__name__": "__main__"}
-    namespace['sys'] = globals()['sys']
+    namespace["sys"] = globals()["sys"]
 
     try:
-        pkg_resources.load_entry_point("sourmash", 'console_scripts', scriptname)()
+        pkg_resources.load_entry_point("sourmash", "console_scripts", scriptname)()
         return 0
     except pkg_resources.ResolutionError:
         pass
@@ -75,15 +93,15 @@ def _runscript(scriptname):
     if os.path.isfile(scriptfile):
         if os.path.isfile(scriptfile):
             exec(  # pylint: disable=exec-used
-                compile(open(scriptfile).read(), scriptfile, 'exec'),
-                namespace)
+                compile(open(scriptfile).read(), scriptfile, "exec"), namespace
+            )
             return 0
 
     return -1
 
 
-ScriptResults = collections.namedtuple('ScriptResults',
-                                       ['status', 'out', 'err'])
+ScriptResults = collections.namedtuple("ScriptResults", ["status", "out", "err"])
+
 
 def runscript(scriptname, args, **kwargs):
     """Run a Python script using exec().
@@ -99,8 +117,8 @@ def runscript(scriptname, args, **kwargs):
     sysargs.extend(args)
 
     cwd = os.getcwd()
-    in_directory = kwargs.get('in_directory', cwd)
-    fail_ok = kwargs.get('fail_ok', False)
+    in_directory = kwargs.get("in_directory", cwd)
+    fail_ok = kwargs.get("fail_ok", False)
 
     try:
         status = -1
@@ -108,8 +126,8 @@ def runscript(scriptname, args, **kwargs):
         sys.argv = sysargs
 
         oldin = None
-        if 'stdin_data' in kwargs:
-            oldin, sys.stdin = sys.stdin, StringIO(kwargs['stdin_data'])
+        if "stdin_data" in kwargs:
+            oldin, sys.stdin = sys.stdin, StringIO(kwargs["stdin_data"])
 
         oldout, olderr = sys.stdout, sys.stderr
         sys.stdout = StringIO()
@@ -119,8 +137,8 @@ def runscript(scriptname, args, **kwargs):
         os.chdir(in_directory)
 
         try:
-            print('running:', scriptname, 'in:', in_directory, file=oldout)
-            print('arguments', sysargs, file=oldout)
+            print("running:", scriptname, "in:", in_directory, file=oldout)
+            print("arguments", sysargs, file=oldout)
 
             status = _runscript(scriptname)
         except SystemExit as err:
@@ -150,7 +168,7 @@ def runscript(scriptname, args, **kwargs):
 
 class TempDirectory(object):
     def __init__(self):
-        self.tempdir = tempfile.mkdtemp(prefix='sourmashtest_')
+        self.tempdir = tempfile.mkdtemp(prefix="sourmashtest_")
 
     def __enter__(self):
         return self.tempdir
@@ -168,7 +186,7 @@ def __exit__(self, exc_type, exc_value, traceback):
 class SourmashCommandFailed(Exception):
     def __init__(self, msg):
         Exception.__init__(self, msg)
-        self.message = msg 
+        self.message = msg
 
 
 class RunnerContext(object):
@@ -181,6 +199,7 @@ class RunnerContext(object):
 
     You can use the 'output' method to build filenames in my temp directory.
     """
+
     def __init__(self, location):
         self.location = location
         self.last_command = None
@@ -188,25 +207,26 @@ def __init__(self, location):
 
     def run_sourmash(self, *args, **kwargs):
         "Run the sourmash script with the given arguments."
-        kwargs['fail_ok'] = True
-        if 'in_directory' not in kwargs:
-            kwargs['in_directory'] = self.location
+        kwargs["fail_ok"] = True
+        if "in_directory" not in kwargs:
+            kwargs["in_directory"] = self.location
 
-        cmdlist = ['sourmash']
-        cmdlist.extend(( str(x) for x in args))
+        cmdlist = ["sourmash"]
+        cmdlist.extend((str(x) for x in args))
         self.last_command = " ".join(cmdlist)
-        self.last_result = runscript('sourmash', args, **kwargs)
+        self.last_result = runscript("sourmash", args, **kwargs)
 
         if self.last_result.status:
             raise SourmashCommandFailed(self.last_result.err)
 
         return self.last_result
+
     sourmash = run_sourmash
 
     def run(self, scriptname, *args, **kwargs):
         "Run a script with the given arguments."
-        if 'in_directory' not in kwargs:
-            kwargs['in_directory'] = self.location
+        if "in_directory" not in kwargs:
+            kwargs["in_directory"] = self.location
         self.last_command = " ".join(args)
         self.last_result = runscript(scriptname, args, **kwargs)
         return self.last_result
@@ -224,11 +244,11 @@ def __str__(self):
                 if self.last_result.out:
                     s += "- stdout:\n---\n{}---\n".format(self.last_result.out)
                 else:
-                    s += '(no stdout)\n\n'
+                    s += "(no stdout)\n\n"
                 if self.last_result.err:
                     s += "- stderr:\n---\n{}---\n".format(self.last_result.err)
                 else:
-                    s += '(no stderr)\n'
+                    s += "(no stderr)\n"
 
         return s
 
diff --git a/src/python/tests/test_cluster.py b/src/python/tests/test_cluster.py
index 4ae12173..c986c1a2 100644
--- a/src/python/tests/test_cluster.py
+++ b/src/python/tests/test_cluster.py
@@ -7,14 +7,14 @@
 
 def test_installed(runtmp):
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'cluster')
+        runtmp.sourmash("scripts", "cluster")
 
-    assert 'usage:  cluster' in runtmp.last_result.err
+    assert "usage:  cluster" in runtmp.last_result.err
 
 
 def test_cluster_help(runtmp):
     # test sourmash scripts cluster --help /-h
-    runtmp.sourmash('scripts', 'cluster', '-h')
+    runtmp.sourmash("scripts", "cluster", "-h")
 
     print(runtmp.last_result.err)
     out = runtmp.last_result.out
@@ -25,170 +25,208 @@ def test_cluster_help(runtmp):
     assert "options:" in out
 
 
-def test_cluster_containment(runtmp): 
-    pairwise_csv = get_test_data('cluster.pairwise.csv')
-    output = runtmp.output('clusters.csv')
-    sizes = runtmp.output('sizes.csv')
-    threshold = '0.5'
-
-    runtmp.sourmash('scripts', 'cluster', pairwise_csv, '-o', output,
-                    '--similarity-column', "containment", "--cluster-sizes",
-                    sizes, '--threshold', threshold)
+def test_cluster_containment(runtmp):
+    pairwise_csv = get_test_data("cluster.pairwise.csv")
+    output = runtmp.output("clusters.csv")
+    sizes = runtmp.output("sizes.csv")
+    threshold = "0.5"
+
+    runtmp.sourmash(
+        "scripts",
+        "cluster",
+        pairwise_csv,
+        "-o",
+        output,
+        "--similarity-column",
+        "containment",
+        "--cluster-sizes",
+        sizes,
+        "--threshold",
+        threshold,
+    )
 
     assert os.path.exists(output)
 
     # check cluster output
-    with open(output, mode='r', newline='') as csvfile:
+    with open(output, mode="r", newline="") as csvfile:
         reader = csv.DictReader(csvfile)
         rows = [row for row in reader]
-        assert reader.fieldnames == ['cluster','nodes']
+        assert reader.fieldnames == ["cluster", "nodes"]
     assert len(rows) == 1, f"Expected 1 data row but found {len(rows)}"
-    assert rows[0]['cluster'] == 'Component_1'
-    expected = set("n2;n3;n7;n1;n6;n5;n4".split(';'))
-    assert set(rows[0]['nodes'].split(';')) == expected
+    assert rows[0]["cluster"] == "Component_1"
+    expected = set("n2;n3;n7;n1;n6;n5;n4".split(";"))
+    assert set(rows[0]["nodes"].split(";")) == expected
 
     # check cluster size histogram
-    with open(sizes, mode='r', newline='') as csvfile:
+    with open(sizes, mode="r", newline="") as csvfile:
         reader = csv.DictReader(csvfile)
         rows = [row for row in reader]
-        assert reader.fieldnames == ['cluster_size','count']
+        assert reader.fieldnames == ["cluster_size", "count"]
     assert len(rows) == 1, f"Expected 1 data row but found {len(rows)}"
-    assert rows[0]['cluster_size'] == '7'
-    assert rows[0]['count'] == '1'
+    assert rows[0]["cluster_size"] == "7"
+    assert rows[0]["count"] == "1"
 
 
 def test_cluster_max_containment_1(runtmp):
-    pairwise_csv = get_test_data('cluster.pairwise.csv')
-    output = runtmp.output('clusters.csv')
-    sizes = runtmp.output('sizes.csv')
-    threshold = '0.7'
-
-    runtmp.sourmash('scripts', 'cluster', pairwise_csv, '-o', output,
-                    '--similarity-column', "max_containment", "--cluster-sizes",
-                    sizes, '--threshold', threshold)
+    pairwise_csv = get_test_data("cluster.pairwise.csv")
+    output = runtmp.output("clusters.csv")
+    sizes = runtmp.output("sizes.csv")
+    threshold = "0.7"
+
+    runtmp.sourmash(
+        "scripts",
+        "cluster",
+        pairwise_csv,
+        "-o",
+        output,
+        "--similarity-column",
+        "max_containment",
+        "--cluster-sizes",
+        sizes,
+        "--threshold",
+        threshold,
+    )
 
     assert os.path.exists(output)
 
     # check cluster output
-    with open(output, mode='r', newline='') as csvfile:
+    with open(output, mode="r", newline="") as csvfile:
         reader = csv.DictReader(csvfile)
         rows = [row for row in reader]
-        assert reader.fieldnames == ['cluster','nodes']
+        assert reader.fieldnames == ["cluster", "nodes"]
     assert len(rows) == 1, f"Expected 1 data row but found {len(rows)}"
-    assert rows[0]['cluster'] == 'Component_1'
-    expected = set("n2;n3;n7;n1;n6;n5;n4".split(';'))
-    assert set(rows[0]['nodes'].split(';')) == expected
+    assert rows[0]["cluster"] == "Component_1"
+    expected = set("n2;n3;n7;n1;n6;n5;n4".split(";"))
+    assert set(rows[0]["nodes"].split(";")) == expected
 
     # check cluster size histogram
-    with open(sizes, mode='r', newline='') as csvfile:
+    with open(sizes, mode="r", newline="") as csvfile:
         reader = csv.DictReader(csvfile)
         rows = [row for row in reader]
-        assert reader.fieldnames == ['cluster_size','count']
+        assert reader.fieldnames == ["cluster_size", "count"]
     assert len(rows) == 1, f"Expected 1 data row but found {len(rows)}"
-    assert rows[0]['cluster_size'] == '7'
-    assert rows[0]['count'] == '1'
+    assert rows[0]["cluster_size"] == "7"
+    assert rows[0]["count"] == "1"
 
 
 def test_cluster_max_containment_2(runtmp):
-    pairwise_csv = get_test_data('cluster.pairwise.csv')
-    output = runtmp.output('clusters.csv')
-    sizes = runtmp.output('sizes.csv')
-    threshold = '0.9'
-
-    runtmp.sourmash('scripts', 'cluster', pairwise_csv, '-o', output,
-                    '--similarity-column', "max_containment", "--cluster-sizes",
-                    sizes, '--threshold', threshold)
+    pairwise_csv = get_test_data("cluster.pairwise.csv")
+    output = runtmp.output("clusters.csv")
+    sizes = runtmp.output("sizes.csv")
+    threshold = "0.9"
+
+    runtmp.sourmash(
+        "scripts",
+        "cluster",
+        pairwise_csv,
+        "-o",
+        output,
+        "--similarity-column",
+        "max_containment",
+        "--cluster-sizes",
+        sizes,
+        "--threshold",
+        threshold,
+    )
 
     assert os.path.exists(output)
 
     # check cluster output
-    with open(output, mode='r', newline='') as csvfile:
+    with open(output, mode="r", newline="") as csvfile:
         reader = csv.DictReader(csvfile)
         rows = [row for row in reader]
-        assert reader.fieldnames == ['cluster','nodes']
+        assert reader.fieldnames == ["cluster", "nodes"]
     assert len(rows) == 2, f"Expected 2 data rows but found {len(rows)}"
-    assert rows[0]['cluster'] == 'Component_1'
+    assert rows[0]["cluster"] == "Component_1"
     expected_node_sets = [
-    set("n1;n2;n3;n4;n5".split(';')),
-    set("n6;n7".split(';')),
+        set("n1;n2;n3;n4;n5".split(";")),
+        set("n6;n7".split(";")),
     ]
     for row in rows:
-        assert set(row['nodes'].split(';')) in expected_node_sets
+        assert set(row["nodes"].split(";")) in expected_node_sets
 
     # check cluster size histogram
-    with open(sizes, mode='r', newline='') as csvfile:
+    with open(sizes, mode="r", newline="") as csvfile:
         reader = csv.DictReader(csvfile)
         rows = [row for row in reader]
-        assert reader.fieldnames == ['cluster_size','count']
+        assert reader.fieldnames == ["cluster_size", "count"]
     assert len(rows) == 2, f"Expected 2 data rows but found {len(rows)}"
     rows_as_tuples = {tuple(row.values()) for row in rows}
-    expected = {('5', '1'), ('2', '1')}
+    expected = {("5", "1"), ("2", "1")}
     assert rows_as_tuples == expected
 
 
-def test_cluster_jaccard(runtmp): 
-    pairwise_csv = get_test_data('cluster.pairwise.csv')
-    output = runtmp.output('clusters.csv')
-    sizes = runtmp.output('sizes.csv')
-    threshold = '0.6'
-
-    runtmp.sourmash('scripts', 'cluster', pairwise_csv, '-o', output,
-                    '--similarity-column', "jaccard", "--cluster-sizes",
-                    sizes, '--threshold', threshold)
+def test_cluster_jaccard(runtmp):
+    pairwise_csv = get_test_data("cluster.pairwise.csv")
+    output = runtmp.output("clusters.csv")
+    sizes = runtmp.output("sizes.csv")
+    threshold = "0.6"
+
+    runtmp.sourmash(
+        "scripts",
+        "cluster",
+        pairwise_csv,
+        "-o",
+        output,
+        "--similarity-column",
+        "jaccard",
+        "--cluster-sizes",
+        sizes,
+        "--threshold",
+        threshold,
+    )
 
     assert os.path.exists(output)
 
     # check cluster output
-    with open(output, mode='r', newline='') as csvfile:
+    with open(output, mode="r", newline="") as csvfile:
         reader = csv.DictReader(csvfile)
         rows = [row for row in reader]
-        assert reader.fieldnames == ['cluster','nodes']
+        assert reader.fieldnames == ["cluster", "nodes"]
     assert len(rows) == 4, f"Expected 4 data rows but found {len(rows)}"
-    assert rows[0]['cluster'] == 'Component_1'
+    assert rows[0]["cluster"] == "Component_1"
     expected_node_sets = [
-    set("n3;n4;n5;n6".split(';')),
-    set("n1".split(';')),
-    set("n2".split(';')),
-    set("n7".split(';'))
+        set("n3;n4;n5;n6".split(";")),
+        set("n1".split(";")),
+        set("n2".split(";")),
+        set("n7".split(";")),
     ]
     for row in rows:
-        assert set(row['nodes'].split(';')) in expected_node_sets
+        assert set(row["nodes"].split(";")) in expected_node_sets
 
     # check cluster size histogram
-    with open(sizes, mode='r', newline='') as csvfile:
+    with open(sizes, mode="r", newline="") as csvfile:
         reader = csv.DictReader(csvfile)
         rows = [row for row in reader]
-        assert reader.fieldnames == ['cluster_size','count']
+        assert reader.fieldnames == ["cluster_size", "count"]
     assert len(rows) == 2, f"Expected 2 data rows but found {len(rows)}"
     rows_as_tuples = {tuple(row.values()) for row in rows}
-    expected = {('1', '3'), ('4', '1')}
+    expected = {("1", "3"), ("4", "1")}
     assert rows_as_tuples == expected
 
 
 def test_cluster_default_similarity(runtmp):
-    pairwise_csv = get_test_data('cluster.pairwise.csv')
-    output = runtmp.output('clusters.csv')
-    sizes = runtmp.output('sizes.csv')
-    threshold = '0.9'
+    pairwise_csv = get_test_data("cluster.pairwise.csv")
+    output = runtmp.output("clusters.csv")
+    sizes = runtmp.output("sizes.csv")
+    threshold = "0.9"
 
-    runtmp.sourmash('scripts', 'cluster', pairwise_csv, '-o', output,
-                    '--threshold', threshold)
+    runtmp.sourmash(
+        "scripts", "cluster", pairwise_csv, "-o", output, "--threshold", threshold
+    )
 
     assert os.path.exists(output)
 
     # check cluster output
-    with open(output, mode='r', newline='') as csvfile:
+    with open(output, mode="r", newline="") as csvfile:
         reader = csv.DictReader(csvfile)
         rows = [row for row in reader]
-        assert reader.fieldnames == ['cluster','nodes']
+        assert reader.fieldnames == ["cluster", "nodes"]
     assert len(rows) == 2, f"Expected 2 data rows but found {len(rows)}"
-    assert rows[0]['cluster'] == 'Component_1'
-    expected_node_sets = [
-    set("n1;n2;n3;n4;n5".split(';')),
-    set("n6;n7".split(';'))
-    ]
+    assert rows[0]["cluster"] == "Component_1"
+    expected_node_sets = [set("n1;n2;n3;n4;n5".split(";")), set("n6;n7".split(";"))]
     for row in rows:
-        assert set(row['nodes'].split(';')) in expected_node_sets
+        assert set(row["nodes"].split(";")) in expected_node_sets
 
     # check cluster size histogram
     assert not os.path.exists(sizes)
@@ -196,270 +234,346 @@ def test_cluster_default_similarity(runtmp):
 
 def test_cluster_default_threshold(runtmp):
     # test default threshold (0.95)
-    pairwise_csv = get_test_data('cluster.pairwise.csv')
-    output = runtmp.output('clusters.csv')
-    sizes = runtmp.output('sizes.csv')
+    pairwise_csv = get_test_data("cluster.pairwise.csv")
+    output = runtmp.output("clusters.csv")
+    sizes = runtmp.output("sizes.csv")
 
-    runtmp.sourmash('scripts', 'cluster', pairwise_csv, '-o', output)
+    runtmp.sourmash("scripts", "cluster", pairwise_csv, "-o", output)
 
     assert os.path.exists(output)
 
     # check cluster output
-    with open(output, mode='r', newline='') as csvfile:
+    with open(output, mode="r", newline="") as csvfile:
         reader = csv.DictReader(csvfile)
         rows = [row for row in reader]
-        assert reader.fieldnames == ['cluster','nodes']
+        assert reader.fieldnames == ["cluster", "nodes"]
     assert len(rows) == 5, f"Expected 5 data rows but found {len(rows)}"
-    assert rows[0]['cluster'] == 'Component_1'
+    assert rows[0]["cluster"] == "Component_1"
     expected_node_sets = [
-    set("n1".split(';')),
-    set("n2;n3;n4".split(';')),
-    set("n5".split(';')),
-    set("n6".split(';')),
-    set("n7".split(';'))
+        set("n1".split(";")),
+        set("n2;n3;n4".split(";")),
+        set("n5".split(";")),
+        set("n6".split(";")),
+        set("n7".split(";")),
     ]
     for row in rows:
-        assert set(row['nodes'].split(';')) in expected_node_sets
+        assert set(row["nodes"].split(";")) in expected_node_sets
 
     # check cluster size histogram
     assert not os.path.exists(sizes)
 
 
 def test_cluster_ani(runtmp):
-    pairwise_csv = get_test_data('cluster.pairwise.csv')
-    output = runtmp.output('clusters.csv')
-    sizes = runtmp.output('sizes.csv')
-    threshold = '0.9'
-
-    runtmp.sourmash('scripts', 'cluster', pairwise_csv, '-o', output,
-                    '--similarity-column', "average_containment_ani", "--cluster-sizes",
-                    sizes, '--threshold', threshold)
+    pairwise_csv = get_test_data("cluster.pairwise.csv")
+    output = runtmp.output("clusters.csv")
+    sizes = runtmp.output("sizes.csv")
+    threshold = "0.9"
+
+    runtmp.sourmash(
+        "scripts",
+        "cluster",
+        pairwise_csv,
+        "-o",
+        output,
+        "--similarity-column",
+        "average_containment_ani",
+        "--cluster-sizes",
+        sizes,
+        "--threshold",
+        threshold,
+    )
 
     assert os.path.exists(output)
 
     # check cluster output
-    with open(output, mode='r', newline='') as csvfile:
+    with open(output, mode="r", newline="") as csvfile:
         reader = csv.DictReader(csvfile)
         rows = [row for row in reader]
-        assert reader.fieldnames == ['cluster','nodes']
+        assert reader.fieldnames == ["cluster", "nodes"]
     assert len(rows) == 2, f"Expected 2 data rows but found {len(rows)}"
-    assert rows[0]['cluster'] == 'Component_1'
-    expected_node_sets = [
-    set("n1;n2;n3;n4;n5".split(';')),
-    set("n6;n7".split(';'))
-    ]
+    assert rows[0]["cluster"] == "Component_1"
+    expected_node_sets = [set("n1;n2;n3;n4;n5".split(";")), set("n6;n7".split(";"))]
     for row in rows:
-        assert set(row['nodes'].split(';')) in expected_node_sets
+        assert set(row["nodes"].split(";")) in expected_node_sets
 
     # check cluster size histogram
-    with open(sizes, mode='r', newline='') as csvfile:
+    with open(sizes, mode="r", newline="") as csvfile:
         reader = csv.DictReader(csvfile)
         rows = [row for row in reader]
-        assert reader.fieldnames == ['cluster_size','count']
+        assert reader.fieldnames == ["cluster_size", "count"]
     assert len(rows) == 2, f"Expected 2 data rows but found {len(rows)}"
     rows_as_tuples = {tuple(row.values()) for row in rows}
-    expected = {('5', '1'), ('2', '1')}
+    expected = {("5", "1"), ("2", "1")}
     assert rows_as_tuples == expected
 
 
 def test_cluster_max_ani(runtmp):
-    pairwise_csv = get_test_data('cluster.pairwise.csv')
-    output = runtmp.output('clusters.csv')
-    sizes = runtmp.output('sizes.csv')
-    threshold = '0.9'
-
-    runtmp.sourmash('scripts', 'cluster', pairwise_csv, '-o', output,
-                    '--similarity-column', "max_containment_ani", "--cluster-sizes",
-                    sizes, '--threshold', threshold)
+    pairwise_csv = get_test_data("cluster.pairwise.csv")
+    output = runtmp.output("clusters.csv")
+    sizes = runtmp.output("sizes.csv")
+    threshold = "0.9"
+
+    runtmp.sourmash(
+        "scripts",
+        "cluster",
+        pairwise_csv,
+        "-o",
+        output,
+        "--similarity-column",
+        "max_containment_ani",
+        "--cluster-sizes",
+        sizes,
+        "--threshold",
+        threshold,
+    )
 
-    assert os.path.exists(output) 
+    assert os.path.exists(output)
 
     # check cluster output
-    with open(output, mode='r', newline='') as csvfile:
+    with open(output, mode="r", newline="") as csvfile:
         reader = csv.DictReader(csvfile)
         rows = [row for row in reader]
-        assert reader.fieldnames == ['cluster','nodes']
+        assert reader.fieldnames == ["cluster", "nodes"]
     assert len(rows) == 2, f"Expected 2 data rows but found {len(rows)}"
-    assert rows[0]['cluster'] == 'Component_1'
-    expected_node_sets = [set("n1;n2;n3;n4;n5".split(';')), set("n6;n7".split(';'))]
+    assert rows[0]["cluster"] == "Component_1"
+    expected_node_sets = [set("n1;n2;n3;n4;n5".split(";")), set("n6;n7".split(";"))]
     for row in rows:
-        assert set(row['nodes'].split(';')) in expected_node_sets
+        assert set(row["nodes"].split(";")) in expected_node_sets
 
     # check cluster size histogram
-    with open(sizes, mode='r', newline='') as csvfile:
+    with open(sizes, mode="r", newline="") as csvfile:
         reader = csv.DictReader(csvfile)
         rows = [row for row in reader]
-        assert reader.fieldnames == ['cluster_size','count']
+        assert reader.fieldnames == ["cluster_size", "count"]
     assert len(rows) == 2, f"Expected 2 data rows but found {len(rows)}"
     rows_as_tuples = {tuple(row.values()) for row in rows}
-    expected = {('5', '1'), ('2', '1')}
+    expected = {("5", "1"), ("2", "1")}
     assert rows_as_tuples == expected
 
 
 def test_cluster_ani_pairwise(runtmp):
-    pairwise_csv = runtmp.output('pairwise.csv')
-    output = runtmp.output('clusters.csv')
-    sizes = runtmp.output('sizes.csv')
-    cluster_threshold = '0.90'
+    pairwise_csv = runtmp.output("pairwise.csv")
+    output = runtmp.output("clusters.csv")
+    sizes = runtmp.output("sizes.csv")
+    cluster_threshold = "0.90"
 
-    query_list = runtmp.output('query.txt')
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    query_list = runtmp.output("query.txt")
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(query_list, [sig2, sig47, sig63])
 
-    runtmp.sourmash('scripts', 'pairwise', query_list,
-                    '-o', pairwise_csv, "-t", "-0.1", "--ani")
+    runtmp.sourmash(
+        "scripts", "pairwise", query_list, "-o", pairwise_csv, "-t", "-0.1", "--ani"
+    )
 
     assert os.path.exists(pairwise_csv)
 
-    runtmp.sourmash('scripts', 'cluster', pairwise_csv, '-o', output,
-                    '--similarity-column', "average_containment_ani", "--cluster-sizes",
-                    sizes, '--threshold', cluster_threshold)
+    runtmp.sourmash(
+        "scripts",
+        "cluster",
+        pairwise_csv,
+        "-o",
+        output,
+        "--similarity-column",
+        "average_containment_ani",
+        "--cluster-sizes",
+        sizes,
+        "--threshold",
+        cluster_threshold,
+    )
 
     assert os.path.exists(output)
 
     # check cluster output
-    with open(output, mode='r', newline='') as csvfile:
+    with open(output, mode="r", newline="") as csvfile:
         reader = csv.DictReader(csvfile)
         rows = [row for row in reader]
-        assert reader.fieldnames == ['cluster','nodes']
+        assert reader.fieldnames == ["cluster", "nodes"]
     print(rows)
     assert len(rows) == 2, f"Expected 2 data rows but found {len(rows)}"
-    assert rows[0]['cluster'] == 'Component_1'
-    expected_node_sets = [set("NC_009661.1;NC_011665.1".split(';')), set("CP001071.1".split(';'))]
+    assert rows[0]["cluster"] == "Component_1"
+    expected_node_sets = [
+        set("NC_009661.1;NC_011665.1".split(";")),
+        set("CP001071.1".split(";")),
+    ]
     for row in rows:
-        assert set(row['nodes'].split(';')) in expected_node_sets
+        assert set(row["nodes"].split(";")) in expected_node_sets
 
     # check cluster size histogram
-    with open(sizes, mode='r', newline='') as csvfile:
+    with open(sizes, mode="r", newline="") as csvfile:
         reader = csv.DictReader(csvfile)
         rows = [row for row in reader]
-        assert reader.fieldnames == ['cluster_size','count']
+        assert reader.fieldnames == ["cluster_size", "count"]
     assert len(rows) == 2, f"Expected 2 data rows but found {len(rows)}"
     rows_as_tuples = {tuple(row.values()) for row in rows}
-    expected = {('1', '1'), ('2', '1')}
+    expected = {("1", "1"), ("2", "1")}
     assert rows_as_tuples == expected
 
 
 def test_cluster_avg_ani_no_ani(runtmp, capfd):
-    pairwise_csv = runtmp.output('pairwise.csv')
-    output = runtmp.output('clusters.csv')
-    sizes = runtmp.output('sizes.csv')
-    cluster_threshold = '0.9'
+    pairwise_csv = runtmp.output("pairwise.csv")
+    output = runtmp.output("clusters.csv")
+    sizes = runtmp.output("sizes.csv")
+    cluster_threshold = "0.9"
 
-    query_list = runtmp.output('query.txt')
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    query_list = runtmp.output("query.txt")
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(query_list, [sig2, sig47, sig63])
 
-    runtmp.sourmash('scripts', 'pairwise', query_list,
-                    '-o', pairwise_csv, "-t", "-0.1") # do not pass `--ani`
+    runtmp.sourmash(
+        "scripts", "pairwise", query_list, "-o", pairwise_csv, "-t", "-0.1"
+    )  # do not pass `--ani`
 
     assert os.path.exists(pairwise_csv)
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'cluster', pairwise_csv, '-o', output,
-                    '--similarity-column', "average_containment_ani", "--cluster-sizes",
-                    sizes, '--threshold', cluster_threshold)
+        runtmp.sourmash(
+            "scripts",
+            "cluster",
+            pairwise_csv,
+            "-o",
+            output,
+            "--similarity-column",
+            "average_containment_ani",
+            "--cluster-sizes",
+            sizes,
+            "--threshold",
+            cluster_threshold,
+        )
 
     print(runtmp.last_result.err)
     captured = capfd.readouterr()
     print(captured.err)
-    assert 'average_containment_ani is None. Did you estimate ANI?' in captured.err
+    assert "average_containment_ani is None. Did you estimate ANI?" in captured.err
 
 
 def test_cluster_max_ani_no_ani(runtmp, capfd):
-    pairwise_csv = runtmp.output('pairwise.csv')
-    output = runtmp.output('clusters.csv')
-    sizes = runtmp.output('sizes.csv')
-    cluster_threshold = '0.9'
+    pairwise_csv = runtmp.output("pairwise.csv")
+    output = runtmp.output("clusters.csv")
+    sizes = runtmp.output("sizes.csv")
+    cluster_threshold = "0.9"
 
-    query_list = runtmp.output('query.txt')
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    query_list = runtmp.output("query.txt")
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(query_list, [sig2, sig47, sig63])
 
-    runtmp.sourmash('scripts', 'pairwise', query_list,
-                    '-o', pairwise_csv, "-t", "-0.1") # do not pass `--ani`
+    runtmp.sourmash(
+        "scripts", "pairwise", query_list, "-o", pairwise_csv, "-t", "-0.1"
+    )  # do not pass `--ani`
 
     assert os.path.exists(pairwise_csv)
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'cluster', pairwise_csv, '-o', output,
-                    '--similarity-column', "max_containment_ani", "--cluster-sizes",
-                    sizes, '--threshold', cluster_threshold)
+        runtmp.sourmash(
+            "scripts",
+            "cluster",
+            pairwise_csv,
+            "-o",
+            output,
+            "--similarity-column",
+            "max_containment_ani",
+            "--cluster-sizes",
+            sizes,
+            "--threshold",
+            cluster_threshold,
+        )
 
     print(runtmp.last_result.err)
     captured = capfd.readouterr()
     print(captured.err)
-    assert 'max_containment_ani is None. Did you estimate ANI?' in captured.err
+    assert "max_containment_ani is None. Did you estimate ANI?" in captured.err
 
 
 def test_cluster_ani_multisearch(runtmp):
-    multisearch_csv = runtmp.output('multisearch.csv')
-    output = runtmp.output('clusters.csv')
-    sizes = runtmp.output('sizes.csv')
-    cluster_threshold = '0.90'
+    multisearch_csv = runtmp.output("multisearch.csv")
+    output = runtmp.output("clusters.csv")
+    sizes = runtmp.output("sizes.csv")
+    cluster_threshold = "0.90"
 
-    query_list = runtmp.output('query.txt')
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    query_list = runtmp.output("query.txt")
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(query_list, [sig2, sig47, sig63])
 
-    runtmp.sourmash('scripts', 'multisearch', query_list, query_list,
-                    '-o', multisearch_csv, "-t", "-0.1", "--ani")
+    runtmp.sourmash(
+        "scripts",
+        "multisearch",
+        query_list,
+        query_list,
+        "-o",
+        multisearch_csv,
+        "-t",
+        "-0.1",
+        "--ani",
+    )
 
     assert os.path.exists(multisearch_csv)
 
-    runtmp.sourmash('scripts', 'cluster', multisearch_csv, '-o', output,
-                    '--similarity-column', "average_containment_ani", "--cluster-sizes",
-                    sizes, '--threshold', cluster_threshold)
+    runtmp.sourmash(
+        "scripts",
+        "cluster",
+        multisearch_csv,
+        "-o",
+        output,
+        "--similarity-column",
+        "average_containment_ani",
+        "--cluster-sizes",
+        sizes,
+        "--threshold",
+        cluster_threshold,
+    )
 
     assert os.path.exists(output)
 
     # check cluster output
-    with open(output, mode='r', newline='') as csvfile:
+    with open(output, mode="r", newline="") as csvfile:
         reader = csv.DictReader(csvfile)
         rows = [row for row in reader]
-        assert reader.fieldnames == ['cluster','nodes']
+        assert reader.fieldnames == ["cluster", "nodes"]
     print(rows)
     assert len(rows) == 2, f"Expected 2 data rows but found {len(rows)}"
-    assert rows[0]['cluster'] == 'Component_1'
-    expected_node_sets = [set("NC_009661.1;NC_011665.1".split(';')), set("CP001071.1".split(';'))]
+    assert rows[0]["cluster"] == "Component_1"
+    expected_node_sets = [
+        set("NC_009661.1;NC_011665.1".split(";")),
+        set("CP001071.1".split(";")),
+    ]
     for row in rows:
-        assert set(row['nodes'].split(';')) in expected_node_sets
+        assert set(row["nodes"].split(";")) in expected_node_sets
 
     # check cluster size histogram
-    with open(sizes, mode='r', newline='') as csvfile:
+    with open(sizes, mode="r", newline="") as csvfile:
         reader = csv.DictReader(csvfile)
         rows = [row for row in reader]
-        assert reader.fieldnames == ['cluster_size','count']
+        assert reader.fieldnames == ["cluster_size", "count"]
     assert len(rows) == 2, f"Expected 2 data rows but found {len(rows)}"
     rows_as_tuples = {tuple(row.values()) for row in rows}
-    expected = {('1', '1'), ('2', '1')}
+    expected = {("1", "1"), ("2", "1")}
     assert rows_as_tuples == expected
 
 
 def test_empty_file(runtmp, capfd):
     # test with an empty query list
-    csv = runtmp.output('empty.csv')
+    csv = runtmp.output("empty.csv")
 
     make_file_list(csv, [])
 
-    output = runtmp.output('out.csv')
-    out2 = runtmp.output('counts.csv')
+    output = runtmp.output("out.csv")
+    out2 = runtmp.output("counts.csv")
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'cluster', csv,
-                        '-o', output, '--cluster-sizes', out2)
+        runtmp.sourmash(
+            "scripts", "cluster", csv, "-o", output, "--cluster-sizes", out2
+        )
 
     print(runtmp.last_result.err)
     captured = capfd.readouterr()
@@ -470,18 +584,19 @@ def test_empty_file(runtmp, capfd):
 
 def test_bad_file(runtmp, capfd):
     # test with an empty query list
-    csv = runtmp.output('bad.csv')
-    with open(csv, 'w') as out:
-        out.write('column1,column2')
+    csv = runtmp.output("bad.csv")
+    with open(csv, "w") as out:
+        out.write("column1,column2")
 
     make_file_list(csv, [])
 
-    output = runtmp.output('out.csv')
-    out2 = runtmp.output('counts.csv')
+    output = runtmp.output("out.csv")
+    out2 = runtmp.output("counts.csv")
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'cluster', csv,
-                        '-o', output, '--cluster-sizes', out2)
+        runtmp.sourmash(
+            "scripts", "cluster", csv, "-o", output, "--cluster-sizes", out2
+        )
 
     print(runtmp.last_result.err)
     captured = capfd.readouterr()
diff --git a/src/python/tests/test_fastgather.py b/src/python/tests/test_fastgather.py
index f444818f..99cacf82 100644
--- a/src/python/tests/test_fastgather.py
+++ b/src/python/tests/test_fastgather.py
@@ -4,44 +4,54 @@
 
 import sourmash
 from . import sourmash_tst_utils as utils
-from .sourmash_tst_utils import (get_test_data, make_file_list, zip_siglist,
-                                 index_siglist)
+from .sourmash_tst_utils import (
+    get_test_data,
+    make_file_list,
+    zip_siglist,
+    index_siglist,
+)
 
 
 def test_installed(runtmp):
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'fastgather')
+        runtmp.sourmash("scripts", "fastgather")
 
-    assert 'usage:  fastgather' in runtmp.last_result.err
+    assert "usage:  fastgather" in runtmp.last_result.err
 
 
-def test_simple(runtmp, capfd, indexed_query, indexed_against, zip_against, toggle_internal_storage):
+def test_simple(
+    runtmp, capfd, indexed_query, indexed_against, zip_against, toggle_internal_storage
+):
     # test basic execution!
-    query = get_test_data('SRR606249.sig.gz')
-    against_list = runtmp.output('against.txt')
+    query = get_test_data("SRR606249.sig.gz")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(against_list, [sig2, sig47, sig63])
 
     if indexed_query:
-        query = index_siglist(runtmp, query, runtmp.output('query'),
-                              scaled=100000)
+        query = index_siglist(runtmp, query, runtmp.output("query"), scaled=100000)
 
     if zip_against:
-        against_list = zip_siglist(runtmp, against_list, runtmp.output('against.zip'))
+        against_list = zip_siglist(runtmp, against_list, runtmp.output("against.zip"))
 
     if indexed_against:
-        against_list = index_siglist(runtmp, against_list, runtmp.output('db'),
-                                     toggle_internal_storage=toggle_internal_storage)
-
-    g_output = runtmp.output('gather.csv')
-    p_output = runtmp.output('prefetch.csv')
-
-    runtmp.sourmash('scripts', 'fastgather', query, against_list,
-                    '-o', g_output, '-s', '100000')
+        against_list = index_siglist(
+            runtmp,
+            against_list,
+            runtmp.output("db"),
+            toggle_internal_storage=toggle_internal_storage,
+        )
+
+    g_output = runtmp.output("gather.csv")
+    p_output = runtmp.output("prefetch.csv")
+
+    runtmp.sourmash(
+        "scripts", "fastgather", query, against_list, "-o", g_output, "-s", "100000"
+    )
     assert os.path.exists(g_output)
 
     captured = capfd.readouterr()
@@ -50,178 +60,278 @@ def test_simple(runtmp, capfd, indexed_query, indexed_against, zip_against, togg
     df = pandas.read_csv(g_output)
     assert len(df) == 3
     keys = set(df.keys())
-    assert {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'gather_result_rank', 'intersect_bp'}.issubset(keys)
+    assert {
+        "query_filename",
+        "query_name",
+        "query_md5",
+        "match_name",
+        "match_md5",
+        "gather_result_rank",
+        "intersect_bp",
+    }.issubset(keys)
 
     # CTB note: we do not need to worry about this warning for query from a
     # RocksDB, since there is only one.
     if indexed_against:
-        print('indexed against:', indexed_against)
-        assert "WARNING: loading all sketches from a RocksDB into memory!" in captured.err
+        print("indexed against:", indexed_against)
+        assert (
+            "WARNING: loading all sketches from a RocksDB into memory!" in captured.err
+        )
 
 
 def test_simple_with_prefetch(runtmp, zip_against, indexed, toggle_internal_storage):
     # test basic execution!
-    query = get_test_data('SRR606249.sig.gz')
-    against_list = runtmp.output('against.txt')
+    query = get_test_data("SRR606249.sig.gz")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(against_list, [sig2, sig47, sig63])
 
     if zip_against:
-        against_list = zip_siglist(runtmp, against_list, runtmp.output('against.zip'))
+        against_list = zip_siglist(runtmp, against_list, runtmp.output("against.zip"))
 
     if indexed:
-        against_list = index_siglist(runtmp, against_list, runtmp.output('db'),
-                                     toggle_internal_storage=toggle_internal_storage)
-
-    g_output = runtmp.output('gather.csv')
-    p_output = runtmp.output('prefetch.csv')
-
-    runtmp.sourmash('scripts', 'fastgather', query, against_list,
-                    '-o', g_output, '--output-prefetch', p_output,
-                    '-s', '100000')
+        against_list = index_siglist(
+            runtmp,
+            against_list,
+            runtmp.output("db"),
+            toggle_internal_storage=toggle_internal_storage,
+        )
+
+    g_output = runtmp.output("gather.csv")
+    p_output = runtmp.output("prefetch.csv")
+
+    runtmp.sourmash(
+        "scripts",
+        "fastgather",
+        query,
+        against_list,
+        "-o",
+        g_output,
+        "--output-prefetch",
+        p_output,
+        "-s",
+        "100000",
+    )
     assert os.path.exists(g_output)
     assert os.path.exists(p_output)
 
     df = pandas.read_csv(g_output)
     assert len(df) == 3
     keys = set(df.keys())
-    assert {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'gather_result_rank', 'intersect_bp'}.issubset(keys)
+    assert {
+        "query_filename",
+        "query_name",
+        "query_md5",
+        "match_name",
+        "match_md5",
+        "gather_result_rank",
+        "intersect_bp",
+    }.issubset(keys)
 
     df = pandas.read_csv(p_output)
     assert len(df) == 3
     keys = set(df.keys())
-    assert keys == {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'intersect_bp'}
+    assert keys == {
+        "query_filename",
+        "query_name",
+        "query_md5",
+        "match_name",
+        "match_md5",
+        "intersect_bp",
+    }
 
 
 def test_simple_with_prefetch_list_of_zips(runtmp):
     # test basic execution!
-    query = get_test_data('SRR606249.sig.gz')
-    against_list = runtmp.output('against.txt')
+    query = get_test_data("SRR606249.sig.gz")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.sig.zip')
-    sig47 = get_test_data('47.sig.zip')
-    sig63 = get_test_data('63.sig.zip')
+    sig2 = get_test_data("2.sig.zip")
+    sig47 = get_test_data("47.sig.zip")
+    sig63 = get_test_data("63.sig.zip")
 
     make_file_list(against_list, [sig2, sig47, sig63])
 
-    g_output = runtmp.output('gather.csv')
-    p_output = runtmp.output('prefetch.csv')
-
-    runtmp.sourmash('scripts', 'fastgather', query, against_list,
-                    '-o', g_output, '--output-prefetch', p_output,
-                    '-s', '100000')
+    g_output = runtmp.output("gather.csv")
+    p_output = runtmp.output("prefetch.csv")
+
+    runtmp.sourmash(
+        "scripts",
+        "fastgather",
+        query,
+        against_list,
+        "-o",
+        g_output,
+        "--output-prefetch",
+        p_output,
+        "-s",
+        "100000",
+    )
     assert os.path.exists(g_output)
     assert os.path.exists(p_output)
 
     df = pandas.read_csv(g_output)
     assert len(df) == 3
     keys = set(df.keys())
-    assert {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'gather_result_rank', 'intersect_bp'}.issubset(keys)
+    assert {
+        "query_filename",
+        "query_name",
+        "query_md5",
+        "match_name",
+        "match_md5",
+        "gather_result_rank",
+        "intersect_bp",
+    }.issubset(keys)
 
     df = pandas.read_csv(p_output)
     assert len(df) == 3
     keys = set(df.keys())
-    assert keys == {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'intersect_bp'}
+    assert keys == {
+        "query_filename",
+        "query_name",
+        "query_md5",
+        "match_name",
+        "match_md5",
+        "intersect_bp",
+    }
 
 
 def test_missing_query(runtmp, capfd, zip_against):
     # test missing query
-    query = runtmp.output('no-such-file')
-    against_list = runtmp.output('against.txt')
+    query = runtmp.output("no-such-file")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(against_list, [sig2, sig47, sig63])
 
     if zip_against:
-        against_list = zip_siglist(runtmp, against_list, runtmp.output('against.zip'))
+        against_list = zip_siglist(runtmp, against_list, runtmp.output("against.zip"))
 
-    g_output = runtmp.output('gather.csv')
-    p_output = runtmp.output('prefetch.csv')
+    g_output = runtmp.output("gather.csv")
+    p_output = runtmp.output("prefetch.csv")
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'fastgather', query, against_list,
-                        '-o', g_output, '--output-prefetch', p_output,
-                        '-s', '100000')
+        runtmp.sourmash(
+            "scripts",
+            "fastgather",
+            query,
+            against_list,
+            "-o",
+            g_output,
+            "--output-prefetch",
+            p_output,
+            "-s",
+            "100000",
+        )
 
     captured = capfd.readouterr()
     print(captured.err)
 
-    assert 'Error: No such file or directory' in captured.err
+    assert "Error: No such file or directory" in captured.err
 
 
 def test_bad_query(runtmp, capfd, zip_against):
     # test non-sig query
-    query = runtmp.output('no-such-file')
-    against_list = runtmp.output('against.txt')
+    query = runtmp.output("no-such-file")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     # query doesn't need to be a sig anymore - sig, zip, or pathlist welcome
     # as long as there's only one sketch that matches params
-    make_file_list(query, [sig2,sig47])
+    make_file_list(query, [sig2, sig47])
     make_file_list(against_list, [sig2, sig47, sig63])
 
     if zip_against:
-        against_list = zip_siglist(runtmp, against_list, runtmp.output('against.zip'))
+        against_list = zip_siglist(runtmp, against_list, runtmp.output("against.zip"))
 
-    g_output = runtmp.output('gather.csv')
-    p_output = runtmp.output('prefetch.csv')
+    g_output = runtmp.output("gather.csv")
+    p_output = runtmp.output("prefetch.csv")
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'fastgather', query, against_list,
-                        '-o', g_output, '--output-prefetch', p_output,
-                        '-s', '100000')
+        runtmp.sourmash(
+            "scripts",
+            "fastgather",
+            query,
+            against_list,
+            "-o",
+            g_output,
+            "--output-prefetch",
+            p_output,
+            "-s",
+            "100000",
+        )
 
     captured = capfd.readouterr()
     print(captured.err)
 
-    assert 'Error: Fastgather requires a single query sketch. Check input:' in captured.err
+    assert (
+        "Error: Fastgather requires a single query sketch. Check input:" in captured.err
+    )
 
 
 def test_missing_against(runtmp, capfd, zip_against):
     # test missing against
-    query = get_test_data('SRR606249.sig.gz')
-    against_list = runtmp.output('against.txt')
+    query = get_test_data("SRR606249.sig.gz")
+    against_list = runtmp.output("against.txt")
 
     # don't make against list
     if zip_against:
-        against_list = runtmp.output('against.zip')
+        against_list = runtmp.output("against.zip")
 
-    g_output = runtmp.output('gather.csv')
-    p_output = runtmp.output('prefetch.csv')
+    g_output = runtmp.output("gather.csv")
+    p_output = runtmp.output("prefetch.csv")
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'fastgather', query, against_list,
-                        '-o', g_output, '--output-prefetch', p_output,
-                        '-s', '100000')
+        runtmp.sourmash(
+            "scripts",
+            "fastgather",
+            query,
+            against_list,
+            "-o",
+            g_output,
+            "--output-prefetch",
+            p_output,
+            "-s",
+            "100000",
+        )
 
     captured = capfd.readouterr()
     print(captured.err)
 
-    assert 'Error: No such file or directory' in captured.err
+    assert "Error: No such file or directory" in captured.err
 
 
 def test_sig_against(runtmp, capfd):
-    # sig file is ok as against file now 
-    query = get_test_data('SRR606249.sig.gz')
-
-    sig2 = get_test_data('2.fa.sig.gz')
-
-    g_output = runtmp.output('gather.csv')
-    p_output = runtmp.output('prefetch.csv')
-
-    runtmp.sourmash('scripts', 'fastgather', query, sig2,
-                        '-o', g_output, '--output-prefetch', p_output,
-                        '-s', '100000')
+    # sig file is ok as against file now
+    query = get_test_data("SRR606249.sig.gz")
+
+    sig2 = get_test_data("2.fa.sig.gz")
+
+    g_output = runtmp.output("gather.csv")
+    p_output = runtmp.output("prefetch.csv")
+
+    runtmp.sourmash(
+        "scripts",
+        "fastgather",
+        query,
+        sig2,
+        "-o",
+        g_output,
+        "--output-prefetch",
+        p_output,
+        "-s",
+        "100000",
+    )
 
     captured = capfd.readouterr()
     print(captured.err)
@@ -231,108 +341,158 @@ def test_sig_against(runtmp, capfd):
     df = pandas.read_csv(g_output)
     assert len(df) == 1
     keys = set(df.keys())
-    assert {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'gather_result_rank', 'intersect_bp'}.issubset(keys)
+    assert {
+        "query_filename",
+        "query_name",
+        "query_md5",
+        "match_name",
+        "match_md5",
+        "gather_result_rank",
+        "intersect_bp",
+    }.issubset(keys)
 
 
 def test_bad_against(runtmp, capfd):
     # test bad 'against' file - in this case, one containing a bad filename.
-    query = get_test_data('SRR606249.sig.gz')
-    against_list = runtmp.output('against.txt')
-
-    sig2 = get_test_data('2.fa.sig.gz')
-    make_file_list(against_list, [sig2, 'no-exist'])
-
-
-    g_output = runtmp.output('gather.csv')
-    p_output = runtmp.output('prefetch.csv')
-
-    runtmp.sourmash('scripts', 'fastgather', query, against_list,
-                    '-o', g_output, '--output-prefetch', p_output,
-                    '-s', '100000')
+    query = get_test_data("SRR606249.sig.gz")
+    against_list = runtmp.output("against.txt")
+
+    sig2 = get_test_data("2.fa.sig.gz")
+    make_file_list(against_list, [sig2, "no-exist"])
+
+    g_output = runtmp.output("gather.csv")
+    p_output = runtmp.output("prefetch.csv")
+
+    runtmp.sourmash(
+        "scripts",
+        "fastgather",
+        query,
+        against_list,
+        "-o",
+        g_output,
+        "--output-prefetch",
+        p_output,
+        "-s",
+        "100000",
+    )
 
     captured = capfd.readouterr()
     print(captured.err)
 
     assert "WARNING: could not load sketches from path 'no-exist'" in captured.err
-    assert "WARNING: 1 search paths failed to load. See error messages above." in captured.err
+    assert (
+        "WARNING: 1 search paths failed to load. See error messages above."
+        in captured.err
+    )
 
 
 def test_bad_against_2(runtmp, capfd):
     # test bad 'against' file - in this case, one containing an empty file
-    query = get_test_data('SRR606249.sig.gz')
-    against_list = runtmp.output('against.txt')
+    query = get_test_data("SRR606249.sig.gz")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    empty_file = runtmp.output('empty.sig')
-    with open(empty_file, 'wb') as fp:
+    sig2 = get_test_data("2.fa.sig.gz")
+    empty_file = runtmp.output("empty.sig")
+    with open(empty_file, "wb") as fp:
         pass
     make_file_list(against_list, [sig2, empty_file])
 
-
-    g_output = runtmp.output('gather.csv')
-    p_output = runtmp.output('prefetch.csv')
-
-    runtmp.sourmash('scripts', 'fastgather', query, against_list,
-                    '-o', g_output, '--output-prefetch', p_output,
-                    '-s', '100000')
+    g_output = runtmp.output("gather.csv")
+    p_output = runtmp.output("prefetch.csv")
+
+    runtmp.sourmash(
+        "scripts",
+        "fastgather",
+        query,
+        against_list,
+        "-o",
+        g_output,
+        "--output-prefetch",
+        p_output,
+        "-s",
+        "100000",
+    )
 
     captured = capfd.readouterr()
     print(captured.err)
 
-    assert "Sketch loading error: File is too short, less than five bytes" in captured.err
+    assert (
+        "Sketch loading error: File is too short, less than five bytes" in captured.err
+    )
     assert "WARNING: could not load sketches from path" in captured.err
 
-    assert "WARNING: 1 search paths failed to load. See error messages above." in captured.err
+    assert (
+        "WARNING: 1 search paths failed to load. See error messages above."
+        in captured.err
+    )
 
 
 def test_bad_against_3(runtmp, capfd):
     # test with a bad against (a .sig.gz file renamed as zip file)
-    query = get_test_data('SRR606249.sig.gz')
+    query = get_test_data("SRR606249.sig.gz")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    against_zip = runtmp.output('against.zip')
+    sig2 = get_test_data("2.fa.sig.gz")
+    against_zip = runtmp.output("against.zip")
     # cp sig2 into against_zip
-    with open(against_zip, 'wb') as fp:
-        with open(sig2, 'rb') as fp2:
+    with open(against_zip, "wb") as fp:
+        with open(sig2, "rb") as fp2:
             fp.write(fp2.read())
 
-    g_output = runtmp.output('gather.csv')
-    p_output = runtmp.output('prefetch.csv')
+    g_output = runtmp.output("gather.csv")
+    p_output = runtmp.output("prefetch.csv")
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'fastgather', query, against_zip,
-                    '-o', g_output, '--output-prefetch', p_output,
-                    '-s', '100000')
+        runtmp.sourmash(
+            "scripts",
+            "fastgather",
+            query,
+            against_zip,
+            "-o",
+            g_output,
+            "--output-prefetch",
+            p_output,
+            "-s",
+            "100000",
+        )
 
     captured = capfd.readouterr()
     print(captured.err)
 
-    assert 'InvalidArchive' in captured.err
+    assert "InvalidArchive" in captured.err
 
 
 @pytest.mark.xfail(reason="should work, bug")
 def test_against_multisigfile(runtmp, zip_against):
     # test against a sigfile that contains multiple sketches
-    query = get_test_data('SRR606249.sig.gz')
-    against_list = runtmp.output('against.txt')
+    query = get_test_data("SRR606249.sig.gz")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
-    combined = runtmp.output('combined.sig.gz')
-    runtmp.sourmash('sig', 'cat', sig2, sig47, sig63, '-o', combined)
+    combined = runtmp.output("combined.sig.gz")
+    runtmp.sourmash("sig", "cat", sig2, sig47, sig63, "-o", combined)
     make_file_list(against_list, [combined])
 
     if zip_against:
-        against_list = zip_siglist(runtmp, against_list, runtmp.output('against.zip'))
-
-    g_output = runtmp.output('gather.csv')
-    p_output = runtmp.output('prefetch.csv')
-
-    runtmp.sourmash('scripts', 'fastgather', query, against_list,
-                    '-o', g_output, '--output-prefetch', p_output,
-                    '-s', '100000')
+        against_list = zip_siglist(runtmp, against_list, runtmp.output("against.zip"))
+
+    g_output = runtmp.output("gather.csv")
+    p_output = runtmp.output("prefetch.csv")
+
+    runtmp.sourmash(
+        "scripts",
+        "fastgather",
+        query,
+        against_list,
+        "-o",
+        g_output,
+        "--output-prefetch",
+        p_output,
+        "-s",
+        "100000",
+    )
     df = pandas.read_csv(g_output)
     assert len(df) == 3
     print(df)
@@ -340,82 +500,111 @@ def test_against_multisigfile(runtmp, zip_against):
 
 def test_query_multisigfile(runtmp, capfd, zip_against):
     # test with a sigfile that contains multiple sketches
-    against_list = runtmp.output('against.txt')
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
-    combined = runtmp.output('combined.sig.gz')
-    runtmp.sourmash('sig', 'cat', sig2, sig47, sig63, '-o', combined)
+    combined = runtmp.output("combined.sig.gz")
+    runtmp.sourmash("sig", "cat", sig2, sig47, sig63, "-o", combined)
 
     make_file_list(against_list, [sig2, sig47, sig63])
 
     if zip_against:
-        against_list = zip_siglist(runtmp, against_list, runtmp.output('against.zip'))
+        against_list = zip_siglist(runtmp, against_list, runtmp.output("against.zip"))
 
-    g_output = runtmp.output('gather.csv')
-    p_output = runtmp.output('prefetch.csv')
+    g_output = runtmp.output("gather.csv")
+    p_output = runtmp.output("prefetch.csv")
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'fastgather', combined, against_list,
-                    '-o', g_output, '--output-prefetch', p_output,
-                    '-s', '100000')
+        runtmp.sourmash(
+            "scripts",
+            "fastgather",
+            combined,
+            against_list,
+            "-o",
+            g_output,
+            "--output-prefetch",
+            p_output,
+            "-s",
+            "100000",
+        )
     # this fails now :)
     captured = capfd.readouterr()
     print(captured.err)
-    assert "Error: Fastgather requires a single query sketch. Check input:" in captured.err
+    assert (
+        "Error: Fastgather requires a single query sketch. Check input:" in captured.err
+    )
 
 
 def test_against_nomatch(runtmp, capfd, zip_against):
     # test with 'against' file containing a non-matching ksize
-    query = get_test_data('SRR606249.sig.gz')
-    against_list = runtmp.output('against.txt')
+    query = get_test_data("SRR606249.sig.gz")
+    against_list = runtmp.output("against.txt")
 
-    sig1 = get_test_data('1.fa.k21.sig.gz')
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig1 = get_test_data("1.fa.k21.sig.gz")
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(against_list, [sig2, sig1, sig47, sig63])
 
     if zip_against:
-        against_list = zip_siglist(runtmp, against_list, runtmp.output('against.zip'))
-
-    g_output = runtmp.output('gather.csv')
-    p_output = runtmp.output('prefetch.csv')
-
-    runtmp.sourmash('scripts', 'fastgather', query, against_list,
-                    '-o', g_output, '--output-prefetch', p_output,
-                    '-s', '100000')
+        against_list = zip_siglist(runtmp, against_list, runtmp.output("against.zip"))
+
+    g_output = runtmp.output("gather.csv")
+    p_output = runtmp.output("prefetch.csv")
+
+    runtmp.sourmash(
+        "scripts",
+        "fastgather",
+        query,
+        against_list,
+        "-o",
+        g_output,
+        "--output-prefetch",
+        p_output,
+        "-s",
+        "100000",
+    )
 
     captured = capfd.readouterr()
     print(captured.err)
 
-    assert 'WARNING: skipped 1 search paths - no compatible signatures.' in captured.err
+    assert "WARNING: skipped 1 search paths - no compatible signatures." in captured.err
 
 
 def test_md5s(runtmp, zip_against):
     # check that the correct md5sums (of the original sketches) are in
     # the output files
-    query = get_test_data('SRR606249.sig.gz')
-    against_list = runtmp.output('against.txt')
+    query = get_test_data("SRR606249.sig.gz")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(against_list, [sig2, sig47, sig63])
 
     if zip_against:
-        against_list = zip_siglist(runtmp, against_list, runtmp.output('against.zip'))
-
-    g_output = runtmp.output('gather.csv')
-    p_output = runtmp.output('prefetch.csv')
-
-    runtmp.sourmash('scripts', 'fastgather', query, against_list,
-                    '-o', g_output, '--output-prefetch', p_output,
-                    '-s', '100000')
+        against_list = zip_siglist(runtmp, against_list, runtmp.output("against.zip"))
+
+    g_output = runtmp.output("gather.csv")
+    p_output = runtmp.output("prefetch.csv")
+
+    runtmp.sourmash(
+        "scripts",
+        "fastgather",
+        query,
+        against_list,
+        "-o",
+        g_output,
+        "--output-prefetch",
+        p_output,
+        "-s",
+        "100000",
+    )
     assert os.path.exists(g_output)
     assert os.path.exists(p_output)
 
@@ -423,9 +612,17 @@ def test_md5s(runtmp, zip_against):
     df = pandas.read_csv(g_output)
     assert len(df) == 3
     keys = set(df.keys())
-    assert {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'gather_result_rank', 'intersect_bp'}.issubset(keys)
-
-    md5s = list(df['match_md5'])
+    assert {
+        "query_filename",
+        "query_name",
+        "query_md5",
+        "match_name",
+        "match_md5",
+        "gather_result_rank",
+        "intersect_bp",
+    }.issubset(keys)
+
+    md5s = list(df["match_md5"])
     print(md5s)
 
     for against_file in (sig2, sig47, sig63):
@@ -438,9 +635,16 @@ def test_md5s(runtmp, zip_against):
     keys = set(df.keys())
 
     # prefetch output has no rank.
-    assert keys == {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'intersect_bp'}
-
-    md5s = list(df['match_md5'])
+    assert keys == {
+        "query_filename",
+        "query_name",
+        "query_md5",
+        "match_name",
+        "match_md5",
+        "intersect_bp",
+    }
+
+    md5s = list(df["match_md5"])
     print(md5s)
 
     for against_file in (sig2, sig47, sig63):
@@ -450,79 +654,133 @@ def test_md5s(runtmp, zip_against):
 
 def test_csv_columns_vs_sourmash_prefetch(runtmp, zip_against):
     # the column names should be strict subsets of sourmash prefetch cols
-    query = get_test_data('SRR606249.sig.gz')
-    against_list = runtmp.output('against.txt')
+    query = get_test_data("SRR606249.sig.gz")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(against_list, [sig2, sig47, sig63])
 
     if zip_against:
-        against_list = zip_siglist(runtmp, against_list, runtmp.output('against.zip'))
+        against_list = zip_siglist(runtmp, against_list, runtmp.output("against.zip"))
 
-    g_output = runtmp.output('gather.csv')
-    p_output = runtmp.output('prefetch.csv')
+    g_output = runtmp.output("gather.csv")
+    p_output = runtmp.output("prefetch.csv")
 
     # first run fastgather
-    runtmp.sourmash('scripts', 'fastgather', query, against_list,
-                    '-o', g_output, '--output-prefetch', p_output,
-                    '-s', '100000')
+    runtmp.sourmash(
+        "scripts",
+        "fastgather",
+        query,
+        against_list,
+        "-o",
+        g_output,
+        "--output-prefetch",
+        p_output,
+        "-s",
+        "100000",
+    )
     assert os.path.exists(g_output)
     assert os.path.exists(p_output)
 
     # now run sourmash prefetch
-    sp_output = runtmp.output('sourmash-prefetch.csv')
-    runtmp.sourmash('prefetch', query, against_list,
-                    '-o', sp_output, '--scaled', '100000')
+    sp_output = runtmp.output("sourmash-prefetch.csv")
+    runtmp.sourmash(
+        "prefetch", query, against_list, "-o", sp_output, "--scaled", "100000"
+    )
 
     gather_df = pandas.read_csv(g_output)
     g_keys = set(gather_df.keys())
-    assert {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'gather_result_rank', 'intersect_bp'}.issubset(g_keys)
-    g_keys.remove('gather_result_rank')       # 'gather_result_rank' is not in sourmash prefetch!
+    assert {
+        "query_filename",
+        "query_name",
+        "query_md5",
+        "match_name",
+        "match_md5",
+        "gather_result_rank",
+        "intersect_bp",
+    }.issubset(g_keys)
+    g_keys.remove(
+        "gather_result_rank"
+    )  # 'gather_result_rank' is not in sourmash prefetch!
 
     sourmash_prefetch_df = pandas.read_csv(sp_output)
     sp_keys = set(sourmash_prefetch_df.keys())
     print(g_keys - sp_keys)
     diff_keys = g_keys - sp_keys
-    assert diff_keys == set(['unique_intersect_bp', 'median_abund', 'f_match_orig', 'std_abund', 'average_abund', 'f_unique_to_query', 'remaining_bp', 'f_unique_weighted', 'sum_weighted_found', 'total_weighted_hashes', 'n_unique_weighted_found', 'f_orig_query', 'f_match'])
+    assert diff_keys == set(
+        [
+            "unique_intersect_bp",
+            "median_abund",
+            "f_match_orig",
+            "std_abund",
+            "average_abund",
+            "f_unique_to_query",
+            "remaining_bp",
+            "f_unique_weighted",
+            "sum_weighted_found",
+            "total_weighted_hashes",
+            "n_unique_weighted_found",
+            "f_orig_query",
+            "f_match",
+        ]
+    )
 
 
 def test_fastgather_gatherout_as_picklist(runtmp, zip_against):
     # should be able to use fastgather gather output as picklist
-    query = get_test_data('SRR606249.sig.gz')
-    against_list = runtmp.output('against.txt')
+    query = get_test_data("SRR606249.sig.gz")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(against_list, [sig2, sig47, sig63])
 
     if zip_against:
-        against_list = zip_siglist(runtmp, against_list, runtmp.output('against.zip'))
+        against_list = zip_siglist(runtmp, against_list, runtmp.output("against.zip"))
 
-    g_output = runtmp.output('gather.csv')
-    p_output = runtmp.output('prefetch.csv')
+    g_output = runtmp.output("gather.csv")
+    p_output = runtmp.output("prefetch.csv")
 
     # first run fastgather
-    runtmp.sourmash('scripts', 'fastgather', query, against_list,
-                    '-o', g_output, '--output-prefetch', p_output,
-                    '-s', '100000')
+    runtmp.sourmash(
+        "scripts",
+        "fastgather",
+        query,
+        against_list,
+        "-o",
+        g_output,
+        "--output-prefetch",
+        p_output,
+        "-s",
+        "100000",
+    )
     assert os.path.exists(g_output)
     assert os.path.exists(p_output)
 
     # now run sourmash gather using as picklist as picklist
-    gather_picklist_output = runtmp.output('sourmash-gather+picklist.csv')
-    runtmp.sourmash('gather', query, against_list,
-                    '-o', gather_picklist_output, '--scaled', '100000',
-                    '--picklist', f'{g_output}:match_name:ident')
+    gather_picklist_output = runtmp.output("sourmash-gather+picklist.csv")
+    runtmp.sourmash(
+        "gather",
+        query,
+        against_list,
+        "-o",
+        gather_picklist_output,
+        "--scaled",
+        "100000",
+        "--picklist",
+        f"{g_output}:match_name:ident",
+    )
 
     # finally, run sourmash gather using fastgather gather output as picklist
-    full_gather_output = runtmp.output('sourmash-gather.csv')
-    runtmp.sourmash('gather', query, against_list,
-                    '-o', full_gather_output, '--scaled', '100000')
+    full_gather_output = runtmp.output("sourmash-gather.csv")
+    runtmp.sourmash(
+        "gather", query, against_list, "-o", full_gather_output, "--scaled", "100000"
+    )
 
     picklist_df = pandas.read_csv(gather_picklist_output)
     full_df = pandas.read_csv(full_gather_output)
@@ -532,38 +790,56 @@ def test_fastgather_gatherout_as_picklist(runtmp, zip_against):
 
 def test_fastgather_prefetchout_as_picklist(runtmp, zip_against):
     # should be able to use fastgather prefetch output as picklist
-    query = get_test_data('SRR606249.sig.gz')
-    against_list = runtmp.output('against.txt')
+    query = get_test_data("SRR606249.sig.gz")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(against_list, [sig2, sig47, sig63])
 
     if zip_against:
-        against_list = zip_siglist(runtmp, against_list, runtmp.output('against.zip'))
+        against_list = zip_siglist(runtmp, against_list, runtmp.output("against.zip"))
 
-    g_output = runtmp.output('gather.csv')
-    p_output = runtmp.output('prefetch.csv')
+    g_output = runtmp.output("gather.csv")
+    p_output = runtmp.output("prefetch.csv")
 
     # first run fastgather
-    runtmp.sourmash('scripts', 'fastgather', query, against_list,
-                    '-o', g_output, '--output-prefetch', p_output,
-                    '-s', '100000')
+    runtmp.sourmash(
+        "scripts",
+        "fastgather",
+        query,
+        against_list,
+        "-o",
+        g_output,
+        "--output-prefetch",
+        p_output,
+        "-s",
+        "100000",
+    )
     assert os.path.exists(g_output)
     assert os.path.exists(p_output)
 
     # now run sourmash gather using fastgather prefetch output as picklist
-    gather_picklist_output = runtmp.output('sourmash-gather+picklist.csv')
-    runtmp.sourmash('gather', query, against_list,
-                    '-o', gather_picklist_output, '--scaled', '100000',
-                    '--picklist', f'{p_output}:match_name:ident')
+    gather_picklist_output = runtmp.output("sourmash-gather+picklist.csv")
+    runtmp.sourmash(
+        "gather",
+        query,
+        against_list,
+        "-o",
+        gather_picklist_output,
+        "--scaled",
+        "100000",
+        "--picklist",
+        f"{p_output}:match_name:ident",
+    )
 
     # finally, run sourmash gather using as picklist as picklist
-    full_gather_output = runtmp.output('sourmash-gather.csv')
-    runtmp.sourmash('gather', query, against_list,
-                    '-o', full_gather_output, '--scaled', '100000')
+    full_gather_output = runtmp.output("sourmash-gather.csv")
+    runtmp.sourmash(
+        "gather", query, against_list, "-o", full_gather_output, "--scaled", "100000"
+    )
 
     picklist_df = pandas.read_csv(gather_picklist_output)
     full_df = pandas.read_csv(full_gather_output)
@@ -573,106 +849,188 @@ def test_fastgather_prefetchout_as_picklist(runtmp, zip_against):
 
 def test_simple_protein(runtmp):
     # test basic protein execution
-    sigs = get_test_data('protein.zip')
+    sigs = get_test_data("protein.zip")
 
-    query = runtmp.output('query.zip')
-    against = runtmp.output('against.zip')
+    query = runtmp.output("query.zip")
+    against = runtmp.output("against.zip")
     # extract query from zip file
-    runtmp.sourmash('sig', 'extract', sigs, '--name', 'GCA_001593935', '-o', query)
+    runtmp.sourmash("sig", "extract", sigs, "--name", "GCA_001593935", "-o", query)
     # extract against from zip file
-    runtmp.sourmash('sig', 'extract', sigs, '--name', 'GCA_001593925', '-o', against)
-
-    g_output = runtmp.output('gather.csv')
-    p_output = runtmp.output('prefetch.csv')
-
-    runtmp.sourmash('scripts', 'fastgather', query, against,
-                    '-o', g_output, '-s', '100', '--moltype', 'protein', '-k', '19',
-                    '--threshold', '0')
+    runtmp.sourmash("sig", "extract", sigs, "--name", "GCA_001593925", "-o", against)
+
+    g_output = runtmp.output("gather.csv")
+    p_output = runtmp.output("prefetch.csv")
+
+    runtmp.sourmash(
+        "scripts",
+        "fastgather",
+        query,
+        against,
+        "-o",
+        g_output,
+        "-s",
+        "100",
+        "--moltype",
+        "protein",
+        "-k",
+        "19",
+        "--threshold",
+        "0",
+    )
     assert os.path.exists(g_output)
 
     df = pandas.read_csv(g_output)
     assert len(df) == 1
     keys = set(df.keys())
-    assert {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'gather_result_rank', 'intersect_bp'}.issubset(keys)
+    assert {
+        "query_filename",
+        "query_name",
+        "query_md5",
+        "match_name",
+        "match_md5",
+        "gather_result_rank",
+        "intersect_bp",
+    }.issubset(keys)
     print(df)
-    assert df['match_md5'][0] == "16869d2c8a1d29d1c8e56f5c561e585e"
+    assert df["match_md5"][0] == "16869d2c8a1d29d1c8e56f5c561e585e"
 
 
 def test_simple_dayhoff(runtmp):
     # test basic protein execution
-    sigs = get_test_data('dayhoff.zip')
+    sigs = get_test_data("dayhoff.zip")
 
-    query = runtmp.output('query.zip')
-    against = runtmp.output('against.zip')
+    query = runtmp.output("query.zip")
+    against = runtmp.output("against.zip")
     # extract query from zip file
-    runtmp.sourmash('sig', 'extract', sigs, '--name', 'GCA_001593935', '-o', query)
+    runtmp.sourmash("sig", "extract", sigs, "--name", "GCA_001593935", "-o", query)
     # extract against from zip file
-    runtmp.sourmash('sig', 'extract', sigs, '--name', 'GCA_001593925', '-o', against)
-
-    g_output = runtmp.output('gather.csv')
-    p_output = runtmp.output('prefetch.csv')
-
-    runtmp.sourmash('scripts', 'fastgather', query, against,
-                    '-o', g_output, '-s', '100', '--moltype', 'dayhoff', '-k', '19',
-                    '--threshold', '0')
+    runtmp.sourmash("sig", "extract", sigs, "--name", "GCA_001593925", "-o", against)
+
+    g_output = runtmp.output("gather.csv")
+    p_output = runtmp.output("prefetch.csv")
+
+    runtmp.sourmash(
+        "scripts",
+        "fastgather",
+        query,
+        against,
+        "-o",
+        g_output,
+        "-s",
+        "100",
+        "--moltype",
+        "dayhoff",
+        "-k",
+        "19",
+        "--threshold",
+        "0",
+    )
     assert os.path.exists(g_output)
 
     df = pandas.read_csv(g_output)
     assert len(df) == 1
     keys = set(df.keys())
-    assert {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'gather_result_rank', 'intersect_bp'}.issubset(keys)
+    assert {
+        "query_filename",
+        "query_name",
+        "query_md5",
+        "match_name",
+        "match_md5",
+        "gather_result_rank",
+        "intersect_bp",
+    }.issubset(keys)
     print(df)
-    assert df['match_md5'][0] == "fbca5e5211e4d58427997fd5c8343e9a"
+    assert df["match_md5"][0] == "fbca5e5211e4d58427997fd5c8343e9a"
 
 
 def test_simple_hp(runtmp):
     # test basic protein execution
-    sigs = get_test_data('hp.zip')
+    sigs = get_test_data("hp.zip")
 
-    query = runtmp.output('query.zip')
-    against = runtmp.output('against.zip')
+    query = runtmp.output("query.zip")
+    against = runtmp.output("against.zip")
     # extract query from zip file
-    runtmp.sourmash('sig', 'extract', sigs, '--name', 'GCA_001593935', '-o', query)
+    runtmp.sourmash("sig", "extract", sigs, "--name", "GCA_001593935", "-o", query)
     # extract against from zip file
-    runtmp.sourmash('sig', 'extract', sigs, '--name', 'GCA_001593925', '-o', against)
-
-    g_output = runtmp.output('gather.csv')
-    p_output = runtmp.output('prefetch.csv')
-
-    runtmp.sourmash('scripts', 'fastgather', query, against,
-                    '-o', g_output, '-s', '100', '--moltype', 'hp', '-k', '19',
-                    '--threshold', '0')
+    runtmp.sourmash("sig", "extract", sigs, "--name", "GCA_001593925", "-o", against)
+
+    g_output = runtmp.output("gather.csv")
+    p_output = runtmp.output("prefetch.csv")
+
+    runtmp.sourmash(
+        "scripts",
+        "fastgather",
+        query,
+        against,
+        "-o",
+        g_output,
+        "-s",
+        "100",
+        "--moltype",
+        "hp",
+        "-k",
+        "19",
+        "--threshold",
+        "0",
+    )
     assert os.path.exists(g_output)
 
     df = pandas.read_csv(g_output)
     assert len(df) == 1
     keys = set(df.keys())
-    assert {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'gather_result_rank', 'intersect_bp'}.issubset(keys)
+    assert {
+        "query_filename",
+        "query_name",
+        "query_md5",
+        "match_name",
+        "match_md5",
+        "gather_result_rank",
+        "intersect_bp",
+    }.issubset(keys)
     print(df)
-    assert df['match_md5'][0] == "ea2a1ad233c2908529d124a330bcb672"
+    assert df["match_md5"][0] == "ea2a1ad233c2908529d124a330bcb672"
 
 
 def test_indexed_against(runtmp, capfd):
     # accept rocksdb against, but with a warning
-    query = get_test_data('SRR606249.sig.gz')
-    against_list = runtmp.output('against.txt')
+    query = get_test_data("SRR606249.sig.gz")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
 
     make_file_list(against_list, [sig2])
-    db_against = runtmp.output('against.rocksdb')
+    db_against = runtmp.output("against.rocksdb")
 
     ## index against
-    runtmp.sourmash('scripts', 'index', against_list,
-                    '-o', db_against, '-k', str(31), '--scaled', str(1000),
-                    '--moltype', "DNA")
-
-    g_output = runtmp.output('gather.csv')
-    p_output = runtmp.output('prefetch.csv')
-
-    runtmp.sourmash('scripts', 'fastgather', query, db_against,
-                    '-o', g_output, '--output-prefetch', p_output,
-                    '-s', '100000')
+    runtmp.sourmash(
+        "scripts",
+        "index",
+        against_list,
+        "-o",
+        db_against,
+        "-k",
+        str(31),
+        "--scaled",
+        str(1000),
+        "--moltype",
+        "DNA",
+    )
+
+    g_output = runtmp.output("gather.csv")
+    p_output = runtmp.output("prefetch.csv")
+
+    runtmp.sourmash(
+        "scripts",
+        "fastgather",
+        query,
+        db_against,
+        "-o",
+        g_output,
+        "--output-prefetch",
+        p_output,
+        "-s",
+        "100000",
+    )
 
     df = pandas.read_csv(g_output)
     assert len(df) == 1
@@ -685,12 +1043,12 @@ def test_indexed_against(runtmp, capfd):
 
 def test_simple_with_manifest_loading(runtmp):
     # test basic execution!
-    query = get_test_data('SRR606249.sig.gz')
-    against_list = runtmp.output('against.txt')
+    query = get_test_data("SRR606249.sig.gz")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(against_list, [sig2, sig47, sig63])
     query_manifest = runtmp.output("query-manifest.csv")
@@ -699,35 +1057,52 @@ def test_simple_with_manifest_loading(runtmp):
     runtmp.sourmash("sig", "manifest", query, "-o", query_manifest)
     runtmp.sourmash("sig", "manifest", against_list, "-o", against_manifest)
 
-    g_output = runtmp.output('gather.csv')
-    p_output = runtmp.output('prefetch.csv')
-
-    runtmp.sourmash('scripts', 'fastgather', query_manifest, against_manifest,
-                    '-o', g_output, '-s', '100000')
+    g_output = runtmp.output("gather.csv")
+    p_output = runtmp.output("prefetch.csv")
+
+    runtmp.sourmash(
+        "scripts",
+        "fastgather",
+        query_manifest,
+        against_manifest,
+        "-o",
+        g_output,
+        "-s",
+        "100000",
+    )
     assert os.path.exists(g_output)
 
     df = pandas.read_csv(g_output)
     assert len(df) == 3
     keys = set(df.keys())
-    assert {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'gather_result_rank', 'intersect_bp'}.issubset(keys)
+    assert {
+        "query_filename",
+        "query_name",
+        "query_md5",
+        "match_name",
+        "match_md5",
+        "gather_result_rank",
+        "intersect_bp",
+    }.issubset(keys)
 
 
 def test_simple_full_output(runtmp):
     # test basic execution!
-    query = get_test_data('SRR606249.sig.gz')
-    against_list = runtmp.output('against.txt')
+    query = get_test_data("SRR606249.sig.gz")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(against_list, [sig2, sig47, sig63])
 
-    g_output = runtmp.output('gather.csv')
-    p_output = runtmp.output('prefetch.csv')
+    g_output = runtmp.output("gather.csv")
+    p_output = runtmp.output("prefetch.csv")
 
-    runtmp.sourmash('scripts', 'fastgather', query, against_list,
-                    '-o', g_output, '-s', '100000')
+    runtmp.sourmash(
+        "scripts", "fastgather", query, against_list, "-o", g_output, "-s", "100000"
+    )
     assert os.path.exists(g_output)
 
     df = pandas.read_csv(g_output)
@@ -735,27 +1110,61 @@ def test_simple_full_output(runtmp):
     keys = set(df.keys())
     print(keys)
     print(df)
-    assert {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'gather_result_rank', 'intersect_bp'}.issubset(keys)
-    expected_keys = {'match_name', 'query_filename', 'query_n_hashes', 'match_filename', 'f_match_orig',
-            'query_bp', 'query_abundance', 'match_containment_ani', 'intersect_bp', 'total_weighted_hashes',
-            'n_unique_weighted_found', 'query_name', 'gather_result_rank', 'moltype',
-            'query_containment_ani', 'sum_weighted_found', 'f_orig_query', 'ksize', 'max_containment_ani',
-            'std_abund', 'scaled', 'average_containment_ani', 'f_match', 'f_unique_to_query',
-            'average_abund', 'unique_intersect_bp', 'median_abund', 'query_md5', 'match_md5', 'remaining_bp',
-            'f_unique_weighted'}
+    assert {
+        "query_filename",
+        "query_name",
+        "query_md5",
+        "match_name",
+        "match_md5",
+        "gather_result_rank",
+        "intersect_bp",
+    }.issubset(keys)
+    expected_keys = {
+        "match_name",
+        "query_filename",
+        "query_n_hashes",
+        "match_filename",
+        "f_match_orig",
+        "query_bp",
+        "query_abundance",
+        "match_containment_ani",
+        "intersect_bp",
+        "total_weighted_hashes",
+        "n_unique_weighted_found",
+        "query_name",
+        "gather_result_rank",
+        "moltype",
+        "query_containment_ani",
+        "sum_weighted_found",
+        "f_orig_query",
+        "ksize",
+        "max_containment_ani",
+        "std_abund",
+        "scaled",
+        "average_containment_ani",
+        "f_match",
+        "f_unique_to_query",
+        "average_abund",
+        "unique_intersect_bp",
+        "median_abund",
+        "query_md5",
+        "match_md5",
+        "remaining_bp",
+        "f_unique_weighted",
+    }
     assert keys == expected_keys
 
-    md5s = set(df['match_md5'])
+    md5s = set(df["match_md5"])
     for against_file in (sig2, sig47, sig63):
         for ss in sourmash.load_file_as_signatures(against_file, ksize=31):
             assert ss.md5sum() in md5s
 
-    intersect_bp = set(df['intersect_bp'])
+    intersect_bp = set(df["intersect_bp"])
     assert intersect_bp == set([4400000, 4100000, 2200000])
-    f_unique_to_query = set([round(x,4) for x in df['f_unique_to_query']])
+    f_unique_to_query = set([round(x, 4) for x in df["f_unique_to_query"]])
     assert f_unique_to_query == set([0.0052, 0.0105, 0.0043])
-    query_containment_ani = set([round(x,4) for x in df['query_containment_ani']])
-    assert query_containment_ani == { 0.8442, 0.8613, 0.8632 }
+    query_containment_ani = set([round(x, 4) for x in df["query_containment_ani"]])
+    assert query_containment_ani == {0.8442, 0.8613, 0.8632}
     print(query_containment_ani)
     for index, row in df.iterrows():
         print(row.to_dict())
@@ -763,29 +1172,39 @@ def test_simple_full_output(runtmp):
 
 def test_fullres_vs_sourmash_gather(runtmp):
     # fastgather results should match to sourmash gather results
-    query = get_test_data('SRR606249.sig.gz')
+    query = get_test_data("SRR606249.sig.gz")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
-    query_list = runtmp.output('query.txt')
+    query_list = runtmp.output("query.txt")
     make_file_list(query_list, [query])
-    against_list = runtmp.output('against.txt')
+    against_list = runtmp.output("against.txt")
     make_file_list(against_list, [sig2, sig47, sig63])
 
-    g_output = runtmp.output('SRR606249.gather.csv')
-    runtmp.sourmash('scripts', 'fastgather', query_list,
-                    against_list, '-s', '100000', '-t', '0',
-                    '-o', g_output)
+    g_output = runtmp.output("SRR606249.gather.csv")
+    runtmp.sourmash(
+        "scripts",
+        "fastgather",
+        query_list,
+        against_list,
+        "-s",
+        "100000",
+        "-t",
+        "0",
+        "-o",
+        g_output,
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
     assert os.path.exists(g_output)
     # now run sourmash gather
-    sg_output = runtmp.output('.csv')
-    runtmp.sourmash('gather', query, against_list,
-                    '-o', sg_output, '--scaled', '100000')
+    sg_output = runtmp.output(".csv")
+    runtmp.sourmash(
+        "gather", query, against_list, "-o", sg_output, "--scaled", "100000"
+    )
 
     gather_df = pandas.read_csv(g_output)
     g_keys = set(gather_df.keys())
@@ -794,88 +1213,115 @@ def test_fullres_vs_sourmash_gather(runtmp):
     sg_keys = set(sourmash_gather_df.keys())
     print(sg_keys)
     modified_keys = ["match_md5", "match_name", "match_filename"]
-    sg_keys.update(modified_keys) # fastgather is more explicit (match_md5 instead of md5, etc)
-    print('g_keys - sg_keys:', g_keys - sg_keys)
+    sg_keys.update(
+        modified_keys
+    )  # fastgather is more explicit (match_md5 instead of md5, etc)
+    print("g_keys - sg_keys:", g_keys - sg_keys)
     assert not g_keys - sg_keys, g_keys - sg_keys
 
     for _idx, row in sourmash_gather_df.iterrows():
         print(row.to_dict())
 
-    fg_intersect_bp = set(gather_df['intersect_bp'])
-    g_intersect_bp = set(sourmash_gather_df['intersect_bp'])
+    fg_intersect_bp = set(gather_df["intersect_bp"])
+    g_intersect_bp = set(sourmash_gather_df["intersect_bp"])
     assert fg_intersect_bp == g_intersect_bp == set([4400000, 4100000, 2200000])
 
-    fg_f_orig_query =  set([round(x,4) for x in gather_df['f_orig_query']])
-    g_f_orig_query =  set([round(x,4) for x in sourmash_gather_df['f_orig_query']])
+    fg_f_orig_query = set([round(x, 4) for x in gather_df["f_orig_query"]])
+    g_f_orig_query = set([round(x, 4) for x in sourmash_gather_df["f_orig_query"]])
     assert fg_f_orig_query == g_f_orig_query == set([0.0098, 0.0105, 0.0052])
 
-    fg_f_match =  set([round(x,4) for x in gather_df['f_match']])
-    g_f_match =  set([round(x,4) for x in sourmash_gather_df['f_match']])
+    fg_f_match = set([round(x, 4) for x in gather_df["f_match"]])
+    g_f_match = set([round(x, 4) for x in sourmash_gather_df["f_match"]])
     assert fg_f_match == g_f_match == set([0.439, 1.0])
 
-    fg_f_unique_to_query =  set([round(x,3) for x in gather_df['f_unique_to_query']]) # rounding to 4 --> slightly different!
-    g_f_unique_to_query =  set([round(x,3) for x in sourmash_gather_df['f_unique_to_query']])
+    fg_f_unique_to_query = set(
+        [round(x, 3) for x in gather_df["f_unique_to_query"]]
+    )  # rounding to 4 --> slightly different!
+    g_f_unique_to_query = set(
+        [round(x, 3) for x in sourmash_gather_df["f_unique_to_query"]]
+    )
     assert fg_f_unique_to_query == g_f_unique_to_query == set([0.004, 0.01, 0.005])
 
-    fg_f_unique_weighted =  set([round(x,4) for x in gather_df['f_unique_weighted']])
-    g_f_unique_weighted =  set([round(x,4) for x in sourmash_gather_df['f_unique_weighted']])
-    assert fg_f_unique_weighted== g_f_unique_weighted == set([0.0063, 0.002, 0.0062])
-
-    fg_average_abund =  set([round(x,4) for x in gather_df['average_abund']])
-    g_average_abund =  set([round(x,4) for x in sourmash_gather_df['average_abund']])
-    assert fg_average_abund== g_average_abund == set([8.2222, 10.3864, 21.0455])
-
-    fg_median_abund =  set([round(x,4) for x in gather_df['median_abund']])
-    g_median_abund =  set([round(x,4) for x in sourmash_gather_df['median_abund']])
-    assert fg_median_abund== g_median_abund == set([8.0, 10.5, 21.5])
-
-    fg_std_abund =  set([round(x,4) for x in gather_df['std_abund']])
-    g_std_abund =  set([round(x,4) for x in sourmash_gather_df['std_abund']])
-    assert fg_std_abund== g_std_abund == set([3.172, 5.6446, 6.9322])
-
-    g_match_filename_basename = [os.path.basename(filename) for filename in sourmash_gather_df['filename']]
-    fg_match_filename_basename = [os.path.basename(filename) for filename in gather_df['match_filename']]
-    assert all([x in fg_match_filename_basename for x in ['2.fa.sig.gz', '63.fa.sig.gz', '47.fa.sig.gz']])
+    fg_f_unique_weighted = set([round(x, 4) for x in gather_df["f_unique_weighted"]])
+    g_f_unique_weighted = set(
+        [round(x, 4) for x in sourmash_gather_df["f_unique_weighted"]]
+    )
+    assert fg_f_unique_weighted == g_f_unique_weighted == set([0.0063, 0.002, 0.0062])
+
+    fg_average_abund = set([round(x, 4) for x in gather_df["average_abund"]])
+    g_average_abund = set([round(x, 4) for x in sourmash_gather_df["average_abund"]])
+    assert fg_average_abund == g_average_abund == set([8.2222, 10.3864, 21.0455])
+
+    fg_median_abund = set([round(x, 4) for x in gather_df["median_abund"]])
+    g_median_abund = set([round(x, 4) for x in sourmash_gather_df["median_abund"]])
+    assert fg_median_abund == g_median_abund == set([8.0, 10.5, 21.5])
+
+    fg_std_abund = set([round(x, 4) for x in gather_df["std_abund"]])
+    g_std_abund = set([round(x, 4) for x in sourmash_gather_df["std_abund"]])
+    assert fg_std_abund == g_std_abund == set([3.172, 5.6446, 6.9322])
+
+    g_match_filename_basename = [
+        os.path.basename(filename) for filename in sourmash_gather_df["filename"]
+    ]
+    fg_match_filename_basename = [
+        os.path.basename(filename) for filename in gather_df["match_filename"]
+    ]
+    assert all(
+        [
+            x in fg_match_filename_basename
+            for x in ["2.fa.sig.gz", "63.fa.sig.gz", "47.fa.sig.gz"]
+        ]
+    )
     assert fg_match_filename_basename == g_match_filename_basename
 
-    assert list(sourmash_gather_df['name']) == list(gather_df['match_name'])
-    assert list(sourmash_gather_df['md5']) == list(gather_df['match_md5'])
+    assert list(sourmash_gather_df["name"]) == list(gather_df["match_name"])
+    assert list(sourmash_gather_df["md5"]) == list(gather_df["match_md5"])
 
-    fg_f_match_orig =  set([round(x,4) for x in gather_df['f_match_orig']])
-    g_f_match_orig =  set([round(x,4) for x in sourmash_gather_df['f_match_orig']])
+    fg_f_match_orig = set([round(x, 4) for x in gather_df["f_match_orig"]])
+    g_f_match_orig = set([round(x, 4) for x in sourmash_gather_df["f_match_orig"]])
     assert fg_f_match_orig == g_f_match_orig == set([1.0])
 
-    fg_unique_intersect_bp = set(gather_df['unique_intersect_bp'])
-    g_unique_intersect_bp = set(sourmash_gather_df['unique_intersect_bp'])
-    assert fg_unique_intersect_bp == g_unique_intersect_bp == set([4400000, 1800000, 2200000])
+    fg_unique_intersect_bp = set(gather_df["unique_intersect_bp"])
+    g_unique_intersect_bp = set(sourmash_gather_df["unique_intersect_bp"])
+    assert (
+        fg_unique_intersect_bp
+        == g_unique_intersect_bp
+        == set([4400000, 1800000, 2200000])
+    )
+
+    fg_gather_result_rank = set(gather_df["gather_result_rank"])
+    g_gather_result_rank = set(sourmash_gather_df["gather_result_rank"])
+    assert fg_gather_result_rank == g_gather_result_rank == set([0, 1, 2])
 
-    fg_gather_result_rank= set(gather_df['gather_result_rank'])
-    g_gather_result_rank = set(sourmash_gather_df['gather_result_rank'])
-    assert fg_gather_result_rank == g_gather_result_rank == set([0,1,2])
-    
-    fg_remaining_bp = list(gather_df['remaining_bp'])
+    fg_remaining_bp = list(gather_df["remaining_bp"])
     assert fg_remaining_bp == [415600000, 413400000, 411600000]
     ### Gather remaining bp does not match, but I think this one is right?
-    #g_remaining_bp = list(sourmash_gather_df['remaining_bp'])
-    #print("gather remaining bp: ", g_remaining_bp) #{4000000, 0, 1800000}
+    # g_remaining_bp = list(sourmash_gather_df['remaining_bp'])
+    # print("gather remaining bp: ", g_remaining_bp) #{4000000, 0, 1800000}
     # assert fg_remaining_bp == g_remaining_bp == set([])
-    
-    fg_query_containment_ani = set([round(x,3) for x in gather_df['query_containment_ani']])
-    g_query_containment_ani = set([round(x,3) for x in sourmash_gather_df['query_containment_ani']])
+
+    fg_query_containment_ani = set(
+        [round(x, 3) for x in gather_df["query_containment_ani"]]
+    )
+    g_query_containment_ani = set(
+        [round(x, 3) for x in sourmash_gather_df["query_containment_ani"]]
+    )
     assert fg_query_containment_ani == {0.844, 0.861, 0.863}
     # gather cANI are nans here -- perhaps b/c sketches too small?
     # assert fg_query_containment_ani == g_query_containment_ani == set([0.8632, 0.8444, 0.8391])
     print("fg qcANI: ", fg_query_containment_ani)
     print("g_qcANI: ", g_query_containment_ani)
 
-    fg_n_unique_weighted_found= set(gather_df['n_unique_weighted_found'])
-    g_n_unique_weighted_found = set(sourmash_gather_df['n_unique_weighted_found'])
-    assert fg_n_unique_weighted_found == g_n_unique_weighted_found == set([457, 148, 463])
+    fg_n_unique_weighted_found = set(gather_df["n_unique_weighted_found"])
+    g_n_unique_weighted_found = set(sourmash_gather_df["n_unique_weighted_found"])
+    assert (
+        fg_n_unique_weighted_found == g_n_unique_weighted_found == set([457, 148, 463])
+    )
 
-    fg_sum_weighted_found= set(gather_df['sum_weighted_found'])
-    g_sum_weighted_found = set(sourmash_gather_df['sum_weighted_found'])
+    fg_sum_weighted_found = set(gather_df["sum_weighted_found"])
+    g_sum_weighted_found = set(sourmash_gather_df["sum_weighted_found"])
     assert fg_sum_weighted_found == g_sum_weighted_found == set([920, 457, 1068])
-    
-    fg_total_weighted_hashes= set(gather_df['total_weighted_hashes'])
-    g_total_weighted_hashes = set(sourmash_gather_df['total_weighted_hashes'])
+
+    fg_total_weighted_hashes = set(gather_df["total_weighted_hashes"])
+    g_total_weighted_hashes = set(sourmash_gather_df["total_weighted_hashes"])
     assert fg_total_weighted_hashes == g_total_weighted_hashes == set([73489])
diff --git a/src/python/tests/test_fastmultigather.py b/src/python/tests/test_fastmultigather.py
index 643799b9..a21de63e 100644
--- a/src/python/tests/test_fastmultigather.py
+++ b/src/python/tests/test_fastmultigather.py
@@ -1,6 +1,7 @@
 """
 Test 'sourmash scripts fastmultigather'
 """
+
 import os
 import pytest
 import pandas
@@ -8,170 +9,253 @@
 
 import sourmash
 from . import sourmash_tst_utils as utils
-from .sourmash_tst_utils import (get_test_data, make_file_list, zip_siglist,
-                                 index_siglist)
+from .sourmash_tst_utils import (
+    get_test_data,
+    make_file_list,
+    zip_siglist,
+    index_siglist,
+)
 
 
 def test_installed(runtmp):
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'fastmultigather')
+        runtmp.sourmash("scripts", "fastmultigather")
 
-    assert 'usage:  fastmultigather' in runtmp.last_result.err
+    assert "usage:  fastmultigather" in runtmp.last_result.err
 
 
 def test_simple(runtmp, zip_against):
     # test basic execution!
-    query = get_test_data('SRR606249.sig.gz')
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    query = get_test_data("SRR606249.sig.gz")
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
     make_file_list(query_list, [query])
     make_file_list(against_list, [sig2, sig47, sig63])
 
     if zip_against:
-        against_list = zip_siglist(runtmp, against_list, runtmp.output('against.zip'))
-
-
-    runtmp.sourmash('scripts', 'fastmultigather', query_list, against_list,
-                        '-s', '100000', '-t', '0', in_directory=runtmp.output(''))
-
-    print(os.listdir(runtmp.output('')))
-
-    g_output = runtmp.output('SRR606249.gather.csv')
-    p_output = runtmp.output('SRR606249.prefetch.csv')
+        against_list = zip_siglist(runtmp, against_list, runtmp.output("against.zip"))
+
+    runtmp.sourmash(
+        "scripts",
+        "fastmultigather",
+        query_list,
+        against_list,
+        "-s",
+        "100000",
+        "-t",
+        "0",
+        in_directory=runtmp.output(""),
+    )
+
+    print(os.listdir(runtmp.output("")))
+
+    g_output = runtmp.output("SRR606249.gather.csv")
+    p_output = runtmp.output("SRR606249.prefetch.csv")
     assert os.path.exists(p_output)
 
     # check prefetch output (only non-indexed gather)
     df = pandas.read_csv(p_output)
     assert len(df) == 3
     keys = set(df.keys())
-    assert keys == {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'intersect_bp'}
+    assert keys == {
+        "query_filename",
+        "query_name",
+        "query_md5",
+        "match_name",
+        "match_md5",
+        "intersect_bp",
+    }
 
     assert os.path.exists(g_output)
     df = pandas.read_csv(g_output)
     print(df)
     assert len(df) == 3
     keys = set(df.keys())
-    assert {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'intersect_bp', 'gather_result_rank'}.issubset(keys)
+    assert {
+        "query_filename",
+        "query_name",
+        "query_md5",
+        "match_name",
+        "match_md5",
+        "intersect_bp",
+        "gather_result_rank",
+    }.issubset(keys)
 
 
 def test_simple_list_of_zips(runtmp):
     # test basic execution!
-    query = get_test_data('SRR606249.sig.gz')
-    sig2 = get_test_data('2.sig.zip')
-    sig47 = get_test_data('47.sig.zip')
-    sig63 = get_test_data('63.sig.zip')
+    query = get_test_data("SRR606249.sig.gz")
+    sig2 = get_test_data("2.sig.zip")
+    sig47 = get_test_data("47.sig.zip")
+    sig63 = get_test_data("63.sig.zip")
 
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
     make_file_list(query_list, [query])
     make_file_list(against_list, [sig2, sig47, sig63])
 
     cwd = os.getcwd()
     try:
-        os.chdir(runtmp.output(''))
-        runtmp.sourmash('scripts', 'fastmultigather', query_list, against_list,
-                        '-s', '100000', '-t', '0')
+        os.chdir(runtmp.output(""))
+        runtmp.sourmash(
+            "scripts",
+            "fastmultigather",
+            query_list,
+            against_list,
+            "-s",
+            "100000",
+            "-t",
+            "0",
+        )
     finally:
         os.chdir(cwd)
 
-    print(os.listdir(runtmp.output('')))
+    print(os.listdir(runtmp.output("")))
 
-    g_output = runtmp.output('SRR606249.gather.csv')
-    p_output = runtmp.output('SRR606249.prefetch.csv')
+    g_output = runtmp.output("SRR606249.gather.csv")
+    p_output = runtmp.output("SRR606249.prefetch.csv")
     assert os.path.exists(p_output)
 
     # check prefetch output (only non-indexed gather)
     df = pandas.read_csv(p_output)
     assert len(df) == 3
     keys = set(df.keys())
-    assert keys == {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'intersect_bp'}
+    assert keys == {
+        "query_filename",
+        "query_name",
+        "query_md5",
+        "match_name",
+        "match_md5",
+        "intersect_bp",
+    }
 
     assert os.path.exists(g_output)
     df = pandas.read_csv(g_output)
     print(df)
     assert len(df) == 3
     keys = set(df.keys())
-    assert {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'intersect_bp', 'gather_result_rank'}.issubset(keys)
+    assert {
+        "query_filename",
+        "query_name",
+        "query_md5",
+        "match_name",
+        "match_md5",
+        "intersect_bp",
+        "gather_result_rank",
+    }.issubset(keys)
 
 
 def test_simple_space_in_signame(runtmp):
     # test basic execution!
-    query = get_test_data('SRR606249.sig.gz')
-    renamed_query = runtmp.output('in.zip')
-    name = 'my-favorite-signame has spaces'
+    query = get_test_data("SRR606249.sig.gz")
+    renamed_query = runtmp.output("in.zip")
+    name = "my-favorite-signame has spaces"
     # rename signature
-    runtmp.sourmash('sig', 'rename', query, name, '-o', renamed_query)
+    runtmp.sourmash("sig", "rename", query, name, "-o", renamed_query)
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
-    against_list = runtmp.output('against.txt')
+    against_list = runtmp.output("against.txt")
 
     make_file_list(against_list, [sig2, sig47, sig63])
 
-    runtmp.sourmash('scripts', 'fastmultigather', renamed_query, against_list,
-                    '-s', '100000', '-t', '0', in_directory=runtmp.output(''))
-
-    print(os.listdir(runtmp.output('')))
-
-    g_output = runtmp.output('my-favorite-signame.gather.csv')
-    p_output = runtmp.output('my-favorite-signame.prefetch.csv')
+    runtmp.sourmash(
+        "scripts",
+        "fastmultigather",
+        renamed_query,
+        against_list,
+        "-s",
+        "100000",
+        "-t",
+        "0",
+        in_directory=runtmp.output(""),
+    )
+
+    print(os.listdir(runtmp.output("")))
+
+    g_output = runtmp.output("my-favorite-signame.gather.csv")
+    p_output = runtmp.output("my-favorite-signame.prefetch.csv")
     assert os.path.exists(p_output)
     assert os.path.exists(g_output)
 
 
 def test_simple_zip_query(runtmp):
     # test basic execution!
-    query = get_test_data('SRR606249.sig.gz')
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    query = get_test_data("SRR606249.sig.gz")
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
     make_file_list(query_list, [query])
     make_file_list(against_list, [sig2, sig47, sig63])
 
-    query_list = zip_siglist(runtmp, query_list, runtmp.output('query.zip'))
+    query_list = zip_siglist(runtmp, query_list, runtmp.output("query.zip"))
 
-    runtmp.sourmash('scripts', 'fastmultigather', query_list, against_list,
-                    '-s', '100000', '-t', '0', in_directory=runtmp.output('') )
+    runtmp.sourmash(
+        "scripts",
+        "fastmultigather",
+        query_list,
+        against_list,
+        "-s",
+        "100000",
+        "-t",
+        "0",
+        in_directory=runtmp.output(""),
+    )
 
-    print(os.listdir(runtmp.output('')))
+    print(os.listdir(runtmp.output("")))
 
-    g_output = runtmp.output('SRR606249.gather.csv')
-    p_output = runtmp.output('SRR606249.prefetch.csv')
+    g_output = runtmp.output("SRR606249.gather.csv")
+    p_output = runtmp.output("SRR606249.prefetch.csv")
 
     # check prefetch output (only non-indexed gather)
     assert os.path.exists(p_output)
     df = pandas.read_csv(p_output)
     assert len(df) == 3
     keys = set(df.keys())
-    assert keys == {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'intersect_bp'}
+    assert keys == {
+        "query_filename",
+        "query_name",
+        "query_md5",
+        "match_name",
+        "match_md5",
+        "intersect_bp",
+    }
 
     assert os.path.exists(g_output)
     df = pandas.read_csv(g_output)
     assert len(df) == 3
     keys = set(df.keys())
-    assert {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'intersect_bp', 'gather_result_rank'}.issubset(keys)
+    assert {
+        "query_filename",
+        "query_name",
+        "query_md5",
+        "match_name",
+        "match_md5",
+        "intersect_bp",
+        "gather_result_rank",
+    }.issubset(keys)
 
 
 def test_simple_read_manifests(runtmp):
     # test basic execution!
-    query = get_test_data('SRR606249.sig.gz')
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    query = get_test_data("SRR606249.sig.gz")
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
-    against_list = runtmp.output('against.txt')
+    against_list = runtmp.output("against.txt")
     against_mf = runtmp.output("against.csv")
     query_mf = runtmp.output("query.csv")
 
@@ -180,148 +264,263 @@ def test_simple_read_manifests(runtmp):
     runtmp.sourmash("sig", "manifest", query, "-o", query_mf)
     runtmp.sourmash("sig", "manifest", against_list, "-o", against_mf)
 
-    runtmp.sourmash('scripts', 'fastmultigather', query_mf, against_list,
-                    '-s', '100000', '-t', '0', in_directory=runtmp.output(''))
+    runtmp.sourmash(
+        "scripts",
+        "fastmultigather",
+        query_mf,
+        against_list,
+        "-s",
+        "100000",
+        "-t",
+        "0",
+        in_directory=runtmp.output(""),
+    )
 
-    print(os.listdir(runtmp.output('')))
+    print(os.listdir(runtmp.output("")))
 
-    g_output = runtmp.output('SRR606249.gather.csv')
-    p_output = runtmp.output('SRR606249.prefetch.csv')
+    g_output = runtmp.output("SRR606249.gather.csv")
+    p_output = runtmp.output("SRR606249.prefetch.csv")
 
     # check prefetch output (only non-indexed gather)
     assert os.path.exists(p_output)
     df = pandas.read_csv(p_output)
     assert len(df) == 3
     keys = set(df.keys())
-    assert keys == {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'intersect_bp'}
+    assert keys == {
+        "query_filename",
+        "query_name",
+        "query_md5",
+        "match_name",
+        "match_md5",
+        "intersect_bp",
+    }
 
     assert os.path.exists(g_output)
     df = pandas.read_csv(g_output)
     assert len(df) == 3
     keys = set(df.keys())
-    assert {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'intersect_bp', 'gather_result_rank'}.issubset(keys)
+    assert {
+        "query_filename",
+        "query_name",
+        "query_md5",
+        "match_name",
+        "match_md5",
+        "intersect_bp",
+        "gather_result_rank",
+    }.issubset(keys)
 
 
 def test_simple_indexed(runtmp, zip_query, toggle_internal_storage):
     # test basic execution!
-    query = get_test_data('SRR606249.sig.gz')
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    query = get_test_data("SRR606249.sig.gz")
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
     make_file_list(query_list, [query])
     make_file_list(against_list, [sig2, sig47, sig63])
 
     if zip_query:
-        query_list = zip_siglist(runtmp, query_list, runtmp.output('query.zip'))
-
-    g_output = runtmp.output('out.csv')
-    against_db = index_siglist(runtmp, against_list, runtmp.output('test.rocksdb'), toggle_internal_storage=toggle_internal_storage)
-    runtmp.sourmash('scripts', 'fastmultigather', query_list,
-                        against_db, '-s', '100000', '-t', '0',
-                        '-o', g_output)
+        query_list = zip_siglist(runtmp, query_list, runtmp.output("query.zip"))
+
+    g_output = runtmp.output("out.csv")
+    against_db = index_siglist(
+        runtmp,
+        against_list,
+        runtmp.output("test.rocksdb"),
+        toggle_internal_storage=toggle_internal_storage,
+    )
+    runtmp.sourmash(
+        "scripts",
+        "fastmultigather",
+        query_list,
+        against_db,
+        "-s",
+        "100000",
+        "-t",
+        "0",
+        "-o",
+        g_output,
+    )
 
     assert os.path.exists(g_output)
     df = pandas.read_csv(g_output)
     assert len(df) == 3
     keys = set(df.keys())
-    expected_keys = {'match_name', 'query_filename', 'query_n_hashes', 'match_filename', 'f_match_orig',
-            'query_bp', 'query_abundance', 'match_containment_ani', 'intersect_bp', 'total_weighted_hashes',
-            'n_unique_weighted_found', 'query_name', 'gather_result_rank', 'moltype',
-            'query_containment_ani', 'sum_weighted_found', 'f_orig_query', 'ksize', 'max_containment_ani',
-            'std_abund', 'scaled', 'average_containment_ani', 'f_match', 'f_unique_to_query',
-            'average_abund', 'unique_intersect_bp', 'median_abund', 'query_md5', 'match_md5', 'remaining_bp',
-            'f_unique_weighted'}
-    assert  keys == expected_keys
+    expected_keys = {
+        "match_name",
+        "query_filename",
+        "query_n_hashes",
+        "match_filename",
+        "f_match_orig",
+        "query_bp",
+        "query_abundance",
+        "match_containment_ani",
+        "intersect_bp",
+        "total_weighted_hashes",
+        "n_unique_weighted_found",
+        "query_name",
+        "gather_result_rank",
+        "moltype",
+        "query_containment_ani",
+        "sum_weighted_found",
+        "f_orig_query",
+        "ksize",
+        "max_containment_ani",
+        "std_abund",
+        "scaled",
+        "average_containment_ani",
+        "f_match",
+        "f_unique_to_query",
+        "average_abund",
+        "unique_intersect_bp",
+        "median_abund",
+        "query_md5",
+        "match_md5",
+        "remaining_bp",
+        "f_unique_weighted",
+    }
+    assert keys == expected_keys
 
 
 def test_simple_indexed_query_manifest(runtmp, toggle_internal_storage):
     # test basic execution!
-    query = get_test_data('SRR606249.sig.gz')
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    query = get_test_data("SRR606249.sig.gz")
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
-    query_mf = runtmp.output('query.csv')
-    against_list = runtmp.output('against.txt')
+    query_mf = runtmp.output("query.csv")
+    against_list = runtmp.output("against.txt")
 
     make_file_list(against_list, [sig2, sig47, sig63])
     runtmp.sourmash("sig", "manifest", query, "-o", query_mf)
 
-    g_output = runtmp.output('out.csv')
-    against_db = index_siglist(runtmp, against_list, runtmp.output('db'),
-                               toggle_internal_storage=toggle_internal_storage)
-    runtmp.sourmash('scripts', 'fastmultigather', query_mf,
-                        against_db, '-s', '100000', '-t', '0',
-                        '-o', g_output)
+    g_output = runtmp.output("out.csv")
+    against_db = index_siglist(
+        runtmp,
+        against_list,
+        runtmp.output("db"),
+        toggle_internal_storage=toggle_internal_storage,
+    )
+    runtmp.sourmash(
+        "scripts",
+        "fastmultigather",
+        query_mf,
+        against_db,
+        "-s",
+        "100000",
+        "-t",
+        "0",
+        "-o",
+        g_output,
+    )
 
     assert os.path.exists(g_output)
     df = pandas.read_csv(g_output)
     assert len(df) == 3
     keys = set(df.keys())
-    expected_keys = {'match_name', 'query_filename', 'query_n_hashes', 'match_filename', 'f_match_orig',
-            'query_bp', 'query_abundance', 'match_containment_ani', 'intersect_bp', 'total_weighted_hashes',
-            'n_unique_weighted_found', 'query_name', 'gather_result_rank', 'moltype',
-            'query_containment_ani', 'sum_weighted_found', 'f_orig_query', 'ksize', 'max_containment_ani',
-            'std_abund', 'scaled', 'average_containment_ani', 'f_match', 'f_unique_to_query',
-            'average_abund', 'unique_intersect_bp', 'median_abund', 'query_md5', 'match_md5', 'remaining_bp',
-            'f_unique_weighted'}
-    assert  keys == expected_keys
+    expected_keys = {
+        "match_name",
+        "query_filename",
+        "query_n_hashes",
+        "match_filename",
+        "f_match_orig",
+        "query_bp",
+        "query_abundance",
+        "match_containment_ani",
+        "intersect_bp",
+        "total_weighted_hashes",
+        "n_unique_weighted_found",
+        "query_name",
+        "gather_result_rank",
+        "moltype",
+        "query_containment_ani",
+        "sum_weighted_found",
+        "f_orig_query",
+        "ksize",
+        "max_containment_ani",
+        "std_abund",
+        "scaled",
+        "average_containment_ani",
+        "f_match",
+        "f_unique_to_query",
+        "average_abund",
+        "unique_intersect_bp",
+        "median_abund",
+        "query_md5",
+        "match_md5",
+        "remaining_bp",
+        "f_unique_weighted",
+    }
+    assert keys == expected_keys
 
 
 def test_missing_querylist(runtmp, capfd, indexed, zip_query, toggle_internal_storage):
     # test missing querylist
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     if zip_query:
-        query_list = runtmp.output('query.zip')
+        query_list = runtmp.output("query.zip")
     # do not make query_list!
     make_file_list(against_list, [sig2, sig47, sig63])
 
     if indexed:
-        against_list = index_siglist(runtmp, against_list, runtmp.output('db'),
-                                     toggle_internal_storage=toggle_internal_storage)
+        against_list = index_siglist(
+            runtmp,
+            against_list,
+            runtmp.output("db"),
+            toggle_internal_storage=toggle_internal_storage,
+        )
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'fastmultigather', query_list, against_list,
-                        '-s', '100000')
+        runtmp.sourmash(
+            "scripts", "fastmultigather", query_list, against_list, "-s", "100000"
+        )
 
     captured = capfd.readouterr()
     print(captured.err)
-    assert 'Error: No such file or directory' in captured.err
+    assert "Error: No such file or directory" in captured.err
 
 
 def test_sig_query(runtmp, capfd, indexed):
     # sig file is now fine as a query
-    query = get_test_data('SRR606249.sig.gz')
+    query = get_test_data("SRR606249.sig.gz")
 
-    against_list = runtmp.output('against.txt')
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(against_list, [sig2, sig47, sig63])
 
     if indexed:
-        against_list = index_siglist(runtmp, against_list, runtmp.output('db'))
-        g_output = runtmp.output('out.csv')
-        output_params = ['-o', g_output]
+        against_list = index_siglist(runtmp, against_list, runtmp.output("db"))
+        g_output = runtmp.output("out.csv")
+        output_params = ["-o", g_output]
     else:
-        g_output = runtmp.output('SRR606249.gather.csv')
-        p_output = runtmp.output('SRR606249.prefetch.csv')
+        g_output = runtmp.output("SRR606249.gather.csv")
+        p_output = runtmp.output("SRR606249.prefetch.csv")
         output_params = []
 
-    runtmp.sourmash('scripts', 'fastmultigather', query, against_list,
-                        '-s', '100000', *output_params)
+    runtmp.sourmash(
+        "scripts",
+        "fastmultigather",
+        query,
+        against_list,
+        "-s",
+        "100000",
+        *output_params,
+    )
 
     captured = capfd.readouterr()
     print(captured.err)
@@ -331,7 +530,14 @@ def test_sig_query(runtmp, capfd, indexed):
         df = pandas.read_csv(p_output)
         assert len(df) == 3
         keys = set(df.keys())
-        assert {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'intersect_bp'}.issubset(keys)
+        assert {
+            "query_filename",
+            "query_name",
+            "query_md5",
+            "match_name",
+            "match_md5",
+            "intersect_bp",
+        }.issubset(keys)
 
     # check gather output (both)
     assert os.path.exists(g_output)
@@ -339,32 +545,47 @@ def test_sig_query(runtmp, capfd, indexed):
     assert len(df) == 3
     keys = set(df.keys())
     if indexed:
-        assert {'query_name', 'query_md5', 'match_name', 'match_md5', 'f_match', 'intersect_bp'}.issubset(keys)
+        assert {
+            "query_name",
+            "query_md5",
+            "match_name",
+            "match_md5",
+            "f_match",
+            "intersect_bp",
+        }.issubset(keys)
     else:
-        assert {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'gather_result_rank', 'intersect_bp'}.issubset(keys)
+        assert {
+            "query_filename",
+            "query_name",
+            "query_md5",
+            "match_name",
+            "match_md5",
+            "gather_result_rank",
+            "intersect_bp",
+        }.issubset(keys)
 
 
 def test_bad_query(runtmp, capfd, indexed):
     # test with a bad query (a .sig.gz file renamed as zip file)
-    against_list = runtmp.output('against.txt')
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
-    query_zip = runtmp.output('query.zip')
+    query_zip = runtmp.output("query.zip")
     # cp sig2 into query_zip
-    with open(query_zip, 'wb') as fp:
-        with open(sig2, 'rb') as fp2:
+    with open(query_zip, "wb") as fp:
+        with open(sig2, "rb") as fp2:
             fp.write(fp2.read())
 
     make_file_list(against_list, [sig2, sig47, sig63])
 
     if indexed:
-        against_list = index_siglist(runtmp, against_list, runtmp.output('db'))
+        against_list = index_siglist(runtmp, against_list, runtmp.output("db"))
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'fastmultigather', query_zip, against_list)
+        runtmp.sourmash("scripts", "fastmultigather", query_zip, against_list)
 
     captured = capfd.readouterr()
     print(captured.err)
@@ -374,21 +595,22 @@ def test_bad_query(runtmp, capfd, indexed):
 
 def test_missing_query(runtmp, capfd, indexed):
     # test missing query
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
-    make_file_list(query_list, [sig2, 'no-exist'])
+    make_file_list(query_list, [sig2, "no-exist"])
     make_file_list(against_list, [sig2, sig47, sig63])
 
     if indexed:
-        against_list = index_siglist(runtmp, against_list, runtmp.output('db'))
+        against_list = index_siglist(runtmp, against_list, runtmp.output("db"))
 
-    runtmp.sourmash('scripts', 'fastmultigather', query_list, against_list,
-                    '-s', '100000')
+    runtmp.sourmash(
+        "scripts", "fastmultigather", query_list, against_list, "-s", "100000"
+    )
 
     captured = capfd.readouterr()
     print(captured.err)
@@ -398,24 +620,25 @@ def test_missing_query(runtmp, capfd, indexed):
 
 def test_nomatch_query(runtmp, capfd, indexed, zip_query):
     # test nomatch file in querylist
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
-    badsig1 = get_test_data('1.fa.k21.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
+    badsig1 = get_test_data("1.fa.k21.sig.gz")
 
     make_file_list(query_list, [sig2, badsig1])
     make_file_list(against_list, [sig2, sig47, sig63])
 
     if zip_query:
-        query_list = zip_siglist(runtmp, query_list, runtmp.output('query.zip'))
+        query_list = zip_siglist(runtmp, query_list, runtmp.output("query.zip"))
     if indexed:
-        against_list = index_siglist(runtmp, against_list, runtmp.output('db'))
+        against_list = index_siglist(runtmp, against_list, runtmp.output("db"))
 
-    runtmp.sourmash('scripts', 'fastmultigather', query_list, against_list,
-                    '-s', '100000')
+    runtmp.sourmash(
+        "scripts", "fastmultigather", query_list, against_list, "-s", "100000"
+    )
 
     captured = capfd.readouterr()
     print(captured.err)
@@ -424,40 +647,40 @@ def test_nomatch_query(runtmp, capfd, indexed, zip_query):
 
 def test_missing_against(runtmp, capfd, zip_against):
     # test missing against
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(query_list, [sig2, sig47, sig63])
 
     if zip_against:
-        against_list = runtmp.output('against.zip')
+        against_list = runtmp.output("against.zip")
     # do not make against_list
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'fastmultigather', query_list, against_list,
-                        '-s', '100000')
+        runtmp.sourmash(
+            "scripts", "fastmultigather", query_list, against_list, "-s", "100000"
+        )
 
     captured = capfd.readouterr()
     print(captured.err)
 
-    assert 'Error: No such file or directory' in captured.err
+    assert "Error: No such file or directory" in captured.err
 
 
 def test_sig_against(runtmp, capfd):
     # against file can be a sig now
-    query = get_test_data('SRR606249.sig.gz')
-    against_list = runtmp.output('against.txt')
+    query = get_test_data("SRR606249.sig.gz")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
 
-    g_output = runtmp.output('SRR606249.gather.csv')
-    p_output = runtmp.output('SRR606249.prefetch.csv')
-    runtmp.sourmash('scripts', 'fastmultigather', query, sig2,
-                        '-s', '100000')
+    g_output = runtmp.output("SRR606249.gather.csv")
+    p_output = runtmp.output("SRR606249.prefetch.csv")
+    runtmp.sourmash("scripts", "fastmultigather", query, sig2, "-s", "100000")
 
     captured = capfd.readouterr()
     print(captured.err)
@@ -467,75 +690,96 @@ def test_sig_against(runtmp, capfd):
     df = pandas.read_csv(p_output)
     assert len(df) == 1
     keys = set(df.keys())
-    assert {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'intersect_bp'}.issubset(keys)
+    assert {
+        "query_filename",
+        "query_name",
+        "query_md5",
+        "match_name",
+        "match_md5",
+        "intersect_bp",
+    }.issubset(keys)
 
     # check gather output
     assert os.path.exists(g_output)
     df = pandas.read_csv(g_output)
     assert len(df) == 1
     keys = set(df.keys())
-    assert {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'intersect_bp', 'gather_result_rank'}.issubset(keys)
+    assert {
+        "query_filename",
+        "query_name",
+        "query_md5",
+        "match_name",
+        "match_md5",
+        "intersect_bp",
+        "gather_result_rank",
+    }.issubset(keys)
 
 
 def test_bad_against(runtmp, capfd):
     # test bad 'against' file - in this case, one containing a nonexistent file
-    query = get_test_data('SRR606249.sig.gz')
-    query_list = runtmp.output('query.txt')
+    query = get_test_data("SRR606249.sig.gz")
+    query_list = runtmp.output("query.txt")
     make_file_list(query_list, [query])
 
-    against_list = runtmp.output('against.txt')
-    sig2 = get_test_data('2.fa.sig.gz')
+    against_list = runtmp.output("against.txt")
+    sig2 = get_test_data("2.fa.sig.gz")
     make_file_list(against_list, [sig2, "no exist"])
 
     # should succeed, but with error output.
-    runtmp.sourmash('scripts', 'fastmultigather', query_list, against_list,
-                    '-s', '100000')
+    runtmp.sourmash(
+        "scripts", "fastmultigather", query_list, against_list, "-s", "100000"
+    )
 
     captured = capfd.readouterr()
     print(captured.err)
 
     assert "WARNING: could not load sketches from path 'no exist'" in captured.err
-    assert "WARNING: 1 search paths failed to load. See error messages above." in captured.err
+    assert (
+        "WARNING: 1 search paths failed to load. See error messages above."
+        in captured.err
+    )
 
 
 def test_bad_against_2(runtmp, capfd, zip_query):
     # test with a bad against (a .sig.gz file renamed as zip file)
-    query = get_test_data('SRR606249.sig.gz')
-    query_list = runtmp.output('query.txt')
+    query = get_test_data("SRR606249.sig.gz")
+    query_list = runtmp.output("query.txt")
     make_file_list(query_list, [query])
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    against_zip = runtmp.output('against.zip')
+    sig2 = get_test_data("2.fa.sig.gz")
+    against_zip = runtmp.output("against.zip")
     # cp sig2 into query_zip
-    with open(against_zip, 'wb') as fp:
-        with open(sig2, 'rb') as fp2:
+    with open(against_zip, "wb") as fp:
+        with open(sig2, "rb") as fp2:
             fp.write(fp2.read())
 
     if zip_query:
-        query_list = zip_siglist(runtmp, query_list, runtmp.output('query.zip'))
+        query_list = zip_siglist(runtmp, query_list, runtmp.output("query.zip"))
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'fastmultigather', query_list, against_zip,
-                        '-s', '100000')
+        runtmp.sourmash(
+            "scripts", "fastmultigather", query_list, against_zip, "-s", "100000"
+        )
 
     captured = capfd.readouterr()
     print(captured.err)
 
-    assert 'InvalidArchive' in captured.err
+    assert "InvalidArchive" in captured.err
 
 
 def test_empty_against(runtmp, capfd):
     # test bad 'against' file - in this case, an empty one
-    query = get_test_data('SRR606249.sig.gz')
-    query_list = runtmp.output('query.txt')
+    query = get_test_data("SRR606249.sig.gz")
+    query_list = runtmp.output("query.txt")
     make_file_list(query_list, [query])
 
-    against_list = runtmp.output('against.txt')
+    against_list = runtmp.output("against.txt")
     make_file_list(against_list, [])
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'fastmultigather', query_list, against_list,
-                        '-s', '100000')
+        runtmp.sourmash(
+            "scripts", "fastmultigather", query_list, against_list, "-s", "100000"
+        )
 
     captured = capfd.readouterr()
     print(captured.err)
@@ -546,60 +790,77 @@ def test_empty_against(runtmp, capfd):
 
 def test_nomatch_in_against(runtmp, capfd, zip_against):
     # test an against file that has a non-matching ksize sig in it
-    query = get_test_data('SRR606249.sig.gz')
-    query_list = runtmp.output('query.txt')
+    query = get_test_data("SRR606249.sig.gz")
+    query_list = runtmp.output("query.txt")
     make_file_list(query_list, [query])
 
-    against_list = runtmp.output('against.txt')
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig1 = get_test_data('1.fa.k21.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig1 = get_test_data("1.fa.k21.sig.gz")
     make_file_list(against_list, [sig2, sig1])
 
     if zip_against:
-        against_list = zip_siglist(runtmp, against_list, runtmp.output('against.zip'))
+        against_list = zip_siglist(runtmp, against_list, runtmp.output("against.zip"))
 
-    runtmp.sourmash('scripts', 'fastmultigather', query_list, against_list,
-                    '-s', '100000')
+    runtmp.sourmash(
+        "scripts", "fastmultigather", query_list, against_list, "-s", "100000"
+    )
 
     captured = capfd.readouterr()
     print(captured.err)
 
-    assert 'WARNING: skipped 1 search paths - no compatible signatures.' in captured.err
+    assert "WARNING: skipped 1 search paths - no compatible signatures." in captured.err
 
 
 def test_md5(runtmp, zip_query):
     # test correct md5s present in output
-    query = get_test_data('SRR606249.sig.gz')
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    query = get_test_data("SRR606249.sig.gz")
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
     make_file_list(query_list, [query])
     make_file_list(against_list, [sig2, sig47, sig63])
 
     if zip_query:
-        query_list = zip_siglist(runtmp, query_list, runtmp.output('query.zip'))
+        query_list = zip_siglist(runtmp, query_list, runtmp.output("query.zip"))
 
-    runtmp.sourmash('scripts', 'fastmultigather', query_list, against_list,
-                    '-s', '100000', '-t', '0', in_directory=runtmp.output(''))
+    runtmp.sourmash(
+        "scripts",
+        "fastmultigather",
+        query_list,
+        against_list,
+        "-s",
+        "100000",
+        "-t",
+        "0",
+        in_directory=runtmp.output(""),
+    )
 
-    print(os.listdir(runtmp.output('')))
+    print(os.listdir(runtmp.output("")))
 
-    g_output = runtmp.output('SRR606249.gather.csv')
-    p_output = runtmp.output('SRR606249.prefetch.csv')
+    g_output = runtmp.output("SRR606249.gather.csv")
+    p_output = runtmp.output("SRR606249.prefetch.csv")
 
     # check prefetch output (only non-indexed gather)
     assert os.path.exists(p_output)
     df = pandas.read_csv(p_output)
     assert len(df) == 3
     keys = set(df.keys())
-    assert keys == {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'intersect_bp'}
-
-    md5s = set(df['match_md5'])
+    assert keys == {
+        "query_filename",
+        "query_name",
+        "query_md5",
+        "match_name",
+        "match_md5",
+        "intersect_bp",
+    }
+
+    md5s = set(df["match_md5"])
     for against_file in (sig2, sig47, sig63):
         for ss in sourmash.load_file_as_signatures(against_file, ksize=31):
             assert ss.md5sum() in md5s
@@ -609,9 +870,16 @@ def test_md5(runtmp, zip_query):
     df = pandas.read_csv(g_output)
     assert len(df) == 3
     keys = set(df.keys())
-    assert {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'intersect_bp'}.issubset(keys)
-
-    md5s = set(df['match_md5'])
+    assert {
+        "query_filename",
+        "query_name",
+        "query_md5",
+        "match_name",
+        "match_md5",
+        "intersect_bp",
+    }.issubset(keys)
+
+    md5s = set(df["match_md5"])
     for against_file in (sig2, sig47, sig63):
         for ss in sourmash.load_file_as_signatures(against_file, ksize=31):
             assert ss.md5sum() in md5s
@@ -619,41 +887,76 @@ def test_md5(runtmp, zip_query):
 
 def test_md5_indexed(runtmp, zip_query):
     # test correct md5s present in output
-    query = get_test_data('SRR606249.sig.gz')
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    query = get_test_data("SRR606249.sig.gz")
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
     make_file_list(query_list, [query])
     make_file_list(against_list, [sig2, sig47, sig63])
 
     if zip_query:
-        query_list = zip_siglist(runtmp, query_list, runtmp.output('query.zip'))
-
-    g_output = runtmp.output('out.csv')
-    against_list = index_siglist(runtmp, against_list, runtmp.output('db'))
-    runtmp.sourmash('scripts', 'fastmultigather', query_list,
-                    against_list, '-s', '100000', '-t', '0',
-                    '-o', g_output)
+        query_list = zip_siglist(runtmp, query_list, runtmp.output("query.zip"))
+
+    g_output = runtmp.output("out.csv")
+    against_list = index_siglist(runtmp, against_list, runtmp.output("db"))
+    runtmp.sourmash(
+        "scripts",
+        "fastmultigather",
+        query_list,
+        against_list,
+        "-s",
+        "100000",
+        "-t",
+        "0",
+        "-o",
+        g_output,
+    )
 
     # check gather output (mostly same for indexed vs non-indexed version)
     assert os.path.exists(g_output)
     df = pandas.read_csv(g_output)
     assert len(df) == 3
     keys = set(df.keys())
-    expected_keys = {'match_name', 'query_filename', 'query_n_hashes', 'match_filename', 'f_match_orig',
-            'query_bp', 'query_abundance', 'match_containment_ani', 'intersect_bp', 'total_weighted_hashes',
-            'n_unique_weighted_found', 'query_name', 'gather_result_rank', 'moltype',
-            'query_containment_ani', 'sum_weighted_found', 'f_orig_query', 'ksize', 'max_containment_ani',
-            'std_abund', 'scaled', 'average_containment_ani', 'f_match', 'f_unique_to_query',
-            'average_abund', 'unique_intersect_bp', 'median_abund', 'query_md5', 'match_md5', 'remaining_bp',
-            'f_unique_weighted'}
+    expected_keys = {
+        "match_name",
+        "query_filename",
+        "query_n_hashes",
+        "match_filename",
+        "f_match_orig",
+        "query_bp",
+        "query_abundance",
+        "match_containment_ani",
+        "intersect_bp",
+        "total_weighted_hashes",
+        "n_unique_weighted_found",
+        "query_name",
+        "gather_result_rank",
+        "moltype",
+        "query_containment_ani",
+        "sum_weighted_found",
+        "f_orig_query",
+        "ksize",
+        "max_containment_ani",
+        "std_abund",
+        "scaled",
+        "average_containment_ani",
+        "f_match",
+        "f_unique_to_query",
+        "average_abund",
+        "unique_intersect_bp",
+        "median_abund",
+        "query_md5",
+        "match_md5",
+        "remaining_bp",
+        "f_unique_weighted",
+    }
     assert keys == expected_keys
 
-    md5s = set(df['match_md5'])
+    md5s = set(df["match_md5"])
     for against_file in (sig2, sig47, sig63):
         for ss in sourmash.load_file_as_signatures(against_file, ksize=31):
             assert ss.md5sum() in md5s
@@ -661,147 +964,266 @@ def test_md5_indexed(runtmp, zip_query):
 
 def test_csv_columns_vs_sourmash_prefetch(runtmp, zip_query, zip_against):
     # the column names should be strict subsets of sourmash prefetch cols
-    query = get_test_data('SRR606249.sig.gz')
+    query = get_test_data("SRR606249.sig.gz")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
-    query_list = runtmp.output('query.txt')
+    query_list = runtmp.output("query.txt")
     make_file_list(query_list, [query])
-    against_list = runtmp.output('against.txt')
+    against_list = runtmp.output("against.txt")
     make_file_list(against_list, [sig2, sig47, sig63])
 
     if zip_query:
-        query_list = zip_siglist(runtmp, query_list, runtmp.output('query.zip'))
+        query_list = zip_siglist(runtmp, query_list, runtmp.output("query.zip"))
     if zip_against:
-        against_list = zip_siglist(runtmp, against_list, runtmp.output('against.zip'))
-
-    runtmp.sourmash('scripts', 'fastmultigather', query_list, against_list,
-                    '-s', '100000', '-t', '0', in_directory=runtmp.output(''))
-
-    g_output = runtmp.output('SRR606249.gather.csv')
-    p_output = runtmp.output('SRR606249.prefetch.csv')
+        against_list = zip_siglist(runtmp, against_list, runtmp.output("against.zip"))
+
+    runtmp.sourmash(
+        "scripts",
+        "fastmultigather",
+        query_list,
+        against_list,
+        "-s",
+        "100000",
+        "-t",
+        "0",
+        in_directory=runtmp.output(""),
+    )
+
+    g_output = runtmp.output("SRR606249.gather.csv")
+    p_output = runtmp.output("SRR606249.prefetch.csv")
 
     assert os.path.exists(p_output)
     assert os.path.exists(g_output)
     # now run sourmash prefetch
-    sp_output = runtmp.output('sourmash-prefetch.csv')
-    runtmp.sourmash('prefetch', query, against_list,
-                    '-o', sp_output, '--scaled', '100000')
+    sp_output = runtmp.output("sourmash-prefetch.csv")
+    runtmp.sourmash(
+        "prefetch", query, against_list, "-o", sp_output, "--scaled", "100000"
+    )
 
     gather_df = pandas.read_csv(g_output)
     g_keys = set(gather_df.keys())
-    assert {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'gather_result_rank', 'intersect_bp'}.issubset(g_keys)
-    g_keys.remove('gather_result_rank')       # 'rank' is not in sourmash prefetch!
+    assert {
+        "query_filename",
+        "query_name",
+        "query_md5",
+        "match_name",
+        "match_md5",
+        "gather_result_rank",
+        "intersect_bp",
+    }.issubset(g_keys)
+    g_keys.remove("gather_result_rank")  # 'rank' is not in sourmash prefetch!
 
     sourmash_prefetch_df = pandas.read_csv(sp_output)
     sp_keys = set(sourmash_prefetch_df.keys())
     print(g_keys - sp_keys)
     diff_keys = g_keys - sp_keys
-    assert diff_keys == set(['remaining_bp', 'f_match_orig', 'f_unique_weighted', 'average_abund', 'unique_intersect_bp', 'std_abund', 'sum_weighted_found', 'median_abund', 'n_unique_weighted_found', 'f_unique_to_query', 'f_orig_query', 'total_weighted_hashes', 'f_match'])
+    assert diff_keys == set(
+        [
+            "remaining_bp",
+            "f_match_orig",
+            "f_unique_weighted",
+            "average_abund",
+            "unique_intersect_bp",
+            "std_abund",
+            "sum_weighted_found",
+            "median_abund",
+            "n_unique_weighted_found",
+            "f_unique_to_query",
+            "f_orig_query",
+            "total_weighted_hashes",
+            "f_match",
+        ]
+    )
+
 
 def test_csv_columns_vs_sourmash_gather_fullresults(runtmp):
     # the column names should be identical to sourmash gather cols
-    query = get_test_data('SRR606249.sig.gz')
+    query = get_test_data("SRR606249.sig.gz")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
-    query_list = runtmp.output('query.txt')
+    query_list = runtmp.output("query.txt")
     make_file_list(query_list, [query])
-    against_list = runtmp.output('against.txt')
+    against_list = runtmp.output("against.txt")
     make_file_list(against_list, [sig2, sig47, sig63])
 
-    g_output = runtmp.output('SRR606249.gather.csv')
-    runtmp.sourmash('scripts', 'fastmultigather', query_list,
-                    against_list, '-s', '100000', '-t', '0',
-                    ) # '-o', g_output,
+    g_output = runtmp.output("SRR606249.gather.csv")
+    runtmp.sourmash(
+        "scripts",
+        "fastmultigather",
+        query_list,
+        against_list,
+        "-s",
+        "100000",
+        "-t",
+        "0",
+    )  # '-o', g_output,
 
     assert os.path.exists(g_output)
     # now run sourmash gather
-    sg_output = runtmp.output('.csv')
-    runtmp.sourmash('gather', query, against_list,
-                    '-o', sg_output, '--scaled', '100000')
+    sg_output = runtmp.output(".csv")
+    runtmp.sourmash(
+        "gather", query, against_list, "-o", sg_output, "--scaled", "100000"
+    )
 
     gather_df = pandas.read_csv(g_output)
     g_keys = set(gather_df.keys())
-    expected_keys = {'match_name', 'query_filename', 'query_n_hashes', 'match_filename', 'f_match_orig',
-            'query_bp', 'query_abundance', 'match_containment_ani', 'intersect_bp', 'total_weighted_hashes',
-            'n_unique_weighted_found', 'query_name', 'gather_result_rank', 'moltype',
-            'query_containment_ani', 'sum_weighted_found', 'f_orig_query', 'ksize', 'max_containment_ani',
-            'std_abund', 'scaled', 'average_containment_ani', 'f_match', 'f_unique_to_query',
-            'average_abund', 'unique_intersect_bp', 'median_abund', 'query_md5', 'match_md5', 'remaining_bp',
-            'f_unique_weighted'}
+    expected_keys = {
+        "match_name",
+        "query_filename",
+        "query_n_hashes",
+        "match_filename",
+        "f_match_orig",
+        "query_bp",
+        "query_abundance",
+        "match_containment_ani",
+        "intersect_bp",
+        "total_weighted_hashes",
+        "n_unique_weighted_found",
+        "query_name",
+        "gather_result_rank",
+        "moltype",
+        "query_containment_ani",
+        "sum_weighted_found",
+        "f_orig_query",
+        "ksize",
+        "max_containment_ani",
+        "std_abund",
+        "scaled",
+        "average_containment_ani",
+        "f_match",
+        "f_unique_to_query",
+        "average_abund",
+        "unique_intersect_bp",
+        "median_abund",
+        "query_md5",
+        "match_md5",
+        "remaining_bp",
+        "f_unique_weighted",
+    }
     assert g_keys == expected_keys
 
     sourmash_gather_df = pandas.read_csv(sg_output)
     sg_keys = set(sourmash_gather_df.keys())
     print(sg_keys)
     modified_keys = ["match_md5", "match_name", "match_filename"]
-    sg_keys.update(modified_keys) # fastmultigather is more explicit (match_md5 instead of md5, etc)
-    print('g_keys - sg_keys:', g_keys - sg_keys)
+    sg_keys.update(
+        modified_keys
+    )  # fastmultigather is more explicit (match_md5 instead of md5, etc)
+    print("g_keys - sg_keys:", g_keys - sg_keys)
     assert not g_keys - sg_keys, g_keys - sg_keys
 
 
 def test_csv_columns_vs_sourmash_gather_indexed(runtmp):
     # the column names should be identical to sourmash gather cols
-    query = get_test_data('SRR606249.sig.gz')
+    query = get_test_data("SRR606249.sig.gz")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
-    query_list = runtmp.output('query.txt')
+    query_list = runtmp.output("query.txt")
     make_file_list(query_list, [query])
-    against_list = runtmp.output('against.txt')
+    against_list = runtmp.output("against.txt")
     make_file_list(against_list, [sig2, sig47, sig63])
 
-    g_output = runtmp.output('out.csv')
-    against_db = index_siglist(runtmp, against_list, runtmp.output('db'))
-    runtmp.sourmash('scripts', 'fastmultigather', query_list,
-                    against_db, '-s', '100000', '-t', '0',
-                    '-o', g_output)
+    g_output = runtmp.output("out.csv")
+    against_db = index_siglist(runtmp, against_list, runtmp.output("db"))
+    runtmp.sourmash(
+        "scripts",
+        "fastmultigather",
+        query_list,
+        against_db,
+        "-s",
+        "100000",
+        "-t",
+        "0",
+        "-o",
+        g_output,
+    )
 
     assert os.path.exists(g_output)
     # now run sourmash gather
-    sg_output = runtmp.output('sourmash-gather.csv')
-    runtmp.sourmash('gather', query, against_list,
-                    '-o', sg_output, '--scaled', '100000')
+    sg_output = runtmp.output("sourmash-gather.csv")
+    runtmp.sourmash(
+        "gather", query, against_list, "-o", sg_output, "--scaled", "100000"
+    )
 
     gather_df = pandas.read_csv(g_output)
     g_keys = set(gather_df.keys())
-    expected_keys = {'match_name', 'query_filename', 'query_n_hashes', 'match_filename', 'f_match_orig',
-            'query_bp', 'query_abundance', 'match_containment_ani', 'intersect_bp', 'total_weighted_hashes',
-            'n_unique_weighted_found', 'query_name', 'gather_result_rank', 'moltype',
-            'query_containment_ani', 'sum_weighted_found', 'f_orig_query', 'ksize', 'max_containment_ani',
-            'std_abund', 'scaled', 'average_containment_ani', 'f_match', 'f_unique_to_query',
-            'average_abund', 'unique_intersect_bp', 'median_abund', 'query_md5', 'match_md5', 'remaining_bp',
-            'f_unique_weighted'}
+    expected_keys = {
+        "match_name",
+        "query_filename",
+        "query_n_hashes",
+        "match_filename",
+        "f_match_orig",
+        "query_bp",
+        "query_abundance",
+        "match_containment_ani",
+        "intersect_bp",
+        "total_weighted_hashes",
+        "n_unique_weighted_found",
+        "query_name",
+        "gather_result_rank",
+        "moltype",
+        "query_containment_ani",
+        "sum_weighted_found",
+        "f_orig_query",
+        "ksize",
+        "max_containment_ani",
+        "std_abund",
+        "scaled",
+        "average_containment_ani",
+        "f_match",
+        "f_unique_to_query",
+        "average_abund",
+        "unique_intersect_bp",
+        "median_abund",
+        "query_md5",
+        "match_md5",
+        "remaining_bp",
+        "f_unique_weighted",
+    }
     assert g_keys == expected_keys
 
     sourmash_gather_df = pandas.read_csv(sg_output)
     sg_keys = set(sourmash_gather_df.keys())
     print(sg_keys)
     modified_keys = ["match_md5", "match_name", "match_filename"]
-    sg_keys.update(modified_keys) # fastmultigather is more explicit (match_md5 instead of md5, etc)
-    print('g_keys - sg_keys:', g_keys - sg_keys)
+    sg_keys.update(
+        modified_keys
+    )  # fastmultigather is more explicit (match_md5 instead of md5, etc)
+    print("g_keys - sg_keys:", g_keys - sg_keys)
     assert not g_keys - sg_keys, g_keys - sg_keys
 
 
 def test_simple_protein(runtmp):
     # test basic protein execution
-    sigs = get_test_data('protein.zip')
+    sigs = get_test_data("protein.zip")
 
     sig_names = ["GCA_001593935", "GCA_001593925"]
 
-    runtmp.sourmash('scripts', 'fastmultigather', sigs, sigs,
-                    '-s', '100', '--moltype', 'protein', '-k', '19')
+    runtmp.sourmash(
+        "scripts",
+        "fastmultigather",
+        sigs,
+        sigs,
+        "-s",
+        "100",
+        "--moltype",
+        "protein",
+        "-k",
+        "19",
+    )
 
     for qsig in sig_names:
-        g_output = runtmp.output(os.path.join(qsig + '.gather.csv'))
-        p_output = runtmp.output(os.path.join(qsig + '.prefetch.csv'))
+        g_output = runtmp.output(os.path.join(qsig + ".gather.csv"))
+        p_output = runtmp.output(os.path.join(qsig + ".prefetch.csv"))
         print(g_output)
         assert os.path.exists(g_output)
         assert os.path.exists(p_output)
@@ -809,24 +1231,42 @@ def test_simple_protein(runtmp):
         df = pandas.read_csv(g_output)
         assert len(df) == 1
         keys = set(df.keys())
-        assert {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'intersect_bp', 'gather_result_rank'}.issubset(keys)
+        assert {
+            "query_filename",
+            "query_name",
+            "query_md5",
+            "match_name",
+            "match_md5",
+            "intersect_bp",
+            "gather_result_rank",
+        }.issubset(keys)
         print(df)
         # since we're just matching to identical sigs, the md5s should be the same
-        assert df['query_md5'][0] == df['match_md5'][0]
+        assert df["query_md5"][0] == df["match_md5"][0]
 
 
 def test_simple_dayhoff(runtmp):
     # test basic protein execution
-    sigs = get_test_data('dayhoff.zip')
+    sigs = get_test_data("dayhoff.zip")
 
     sig_names = ["GCA_001593935", "GCA_001593925"]
 
-    runtmp.sourmash('scripts', 'fastmultigather', sigs, sigs,
-                    '-s', '100', '--moltype', 'dayhoff', '-k', '19')
+    runtmp.sourmash(
+        "scripts",
+        "fastmultigather",
+        sigs,
+        sigs,
+        "-s",
+        "100",
+        "--moltype",
+        "dayhoff",
+        "-k",
+        "19",
+    )
 
     for qsig in sig_names:
-        g_output = runtmp.output(os.path.join(qsig + '.gather.csv'))
-        p_output = runtmp.output(os.path.join(qsig + '.prefetch.csv'))
+        g_output = runtmp.output(os.path.join(qsig + ".gather.csv"))
+        p_output = runtmp.output(os.path.join(qsig + ".prefetch.csv"))
         print(g_output)
         assert os.path.exists(g_output)
         assert os.path.exists(p_output)
@@ -834,24 +1274,42 @@ def test_simple_dayhoff(runtmp):
         df = pandas.read_csv(g_output)
         assert len(df) == 1
         keys = set(df.keys())
-        assert {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'intersect_bp', 'gather_result_rank'}.issubset(keys)
+        assert {
+            "query_filename",
+            "query_name",
+            "query_md5",
+            "match_name",
+            "match_md5",
+            "intersect_bp",
+            "gather_result_rank",
+        }.issubset(keys)
         print(df)
         # since we're just matching to identical sigs, the md5s should be the same
-        assert df['query_md5'][0] == df['match_md5'][0]
+        assert df["query_md5"][0] == df["match_md5"][0]
 
 
 def test_simple_hp(runtmp):
     # test basic protein execution
-    sigs = get_test_data('hp.zip')
+    sigs = get_test_data("hp.zip")
 
     sig_names = ["GCA_001593935", "GCA_001593925"]
 
-    runtmp.sourmash('scripts', 'fastmultigather', sigs, sigs,
-                    '-s', '100', '--moltype', 'hp', '-k', '19')
+    runtmp.sourmash(
+        "scripts",
+        "fastmultigather",
+        sigs,
+        sigs,
+        "-s",
+        "100",
+        "--moltype",
+        "hp",
+        "-k",
+        "19",
+    )
 
     for qsig in sig_names:
-        g_output = runtmp.output(os.path.join(qsig + '.gather.csv'))
-        p_output = runtmp.output(os.path.join(qsig + '.prefetch.csv'))
+        g_output = runtmp.output(os.path.join(qsig + ".gather.csv"))
+        p_output = runtmp.output(os.path.join(qsig + ".prefetch.csv"))
         print(g_output)
         assert os.path.exists(g_output)
         assert os.path.exists(p_output)
@@ -859,173 +1317,342 @@ def test_simple_hp(runtmp):
         df = pandas.read_csv(g_output)
         assert len(df) == 1
         keys = set(df.keys())
-        assert {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'intersect_bp', 'gather_result_rank'}.issubset(keys)
+        assert {
+            "query_filename",
+            "query_name",
+            "query_md5",
+            "match_name",
+            "match_md5",
+            "intersect_bp",
+            "gather_result_rank",
+        }.issubset(keys)
         print(df)
         # since we're just matching to identical sigs, the md5s should be the same
-        assert df['query_md5'][0] == df['match_md5'][0]
+        assert df["query_md5"][0] == df["match_md5"][0]
 
 
 def test_simple_protein_indexed(runtmp):
     # test basic protein execution
-    sigs = get_test_data('protein.zip')
-
-    sigs_db = index_siglist(runtmp, sigs, runtmp.output('db'), ksize=19, moltype='protein', scaled=100)
-
-    out_csv = runtmp.output('out.csv')
-    runtmp.sourmash('scripts', 'fastmultigather', sigs, sigs_db,
-                    '-s', '100', '--moltype', 'protein', '-k', '19',
-                    '-o', out_csv)
+    sigs = get_test_data("protein.zip")
+
+    sigs_db = index_siglist(
+        runtmp, sigs, runtmp.output("db"), ksize=19, moltype="protein", scaled=100
+    )
+
+    out_csv = runtmp.output("out.csv")
+    runtmp.sourmash(
+        "scripts",
+        "fastmultigather",
+        sigs,
+        sigs_db,
+        "-s",
+        "100",
+        "--moltype",
+        "protein",
+        "-k",
+        "19",
+        "-o",
+        out_csv,
+    )
 
     assert os.path.exists(out_csv)
 
     df = pandas.read_csv(out_csv)
     assert len(df) == 2
     keys = set(df.keys())
-    expected_keys = {'match_name', 'query_filename', 'query_n_hashes', 'match_filename', 'f_match_orig',
-            'query_bp', 'query_abundance', 'match_containment_ani', 'intersect_bp', 'total_weighted_hashes',
-            'n_unique_weighted_found', 'query_name', 'gather_result_rank', 'moltype',
-            'query_containment_ani', 'sum_weighted_found', 'f_orig_query', 'ksize', 'max_containment_ani',
-            'std_abund', 'scaled', 'average_containment_ani', 'f_match', 'f_unique_to_query',
-            'average_abund', 'unique_intersect_bp', 'median_abund', 'query_md5', 'match_md5', 'remaining_bp',
-            'f_unique_weighted'}
+    expected_keys = {
+        "match_name",
+        "query_filename",
+        "query_n_hashes",
+        "match_filename",
+        "f_match_orig",
+        "query_bp",
+        "query_abundance",
+        "match_containment_ani",
+        "intersect_bp",
+        "total_weighted_hashes",
+        "n_unique_weighted_found",
+        "query_name",
+        "gather_result_rank",
+        "moltype",
+        "query_containment_ani",
+        "sum_weighted_found",
+        "f_orig_query",
+        "ksize",
+        "max_containment_ani",
+        "std_abund",
+        "scaled",
+        "average_containment_ani",
+        "f_match",
+        "f_unique_to_query",
+        "average_abund",
+        "unique_intersect_bp",
+        "median_abund",
+        "query_md5",
+        "match_md5",
+        "remaining_bp",
+        "f_unique_weighted",
+    }
     assert keys == expected_keys
     print(df)
     # since we're just matching to identical sigs, the md5s should be the same
-    assert df['query_md5'][0] == df['match_md5'][0]
-    assert df['query_md5'][1] == df['match_md5'][1]
+    assert df["query_md5"][0] == df["match_md5"][0]
+    assert df["query_md5"][1] == df["match_md5"][1]
 
 
 def test_simple_dayhoff_indexed(runtmp):
     # test basic protein execution
-    sigs = get_test_data('dayhoff.zip')
-
-    sigs_db = index_siglist(runtmp, sigs, runtmp.output('db'), ksize=19, moltype='dayhoff', scaled=100)
-
-    out_csv = runtmp.output('out.csv')
-    runtmp.sourmash('scripts', 'fastmultigather', sigs, sigs_db,
-                    '-s', '100', '--moltype', 'dayhoff', '-k', '19',
-                    '-o', out_csv)
+    sigs = get_test_data("dayhoff.zip")
+
+    sigs_db = index_siglist(
+        runtmp, sigs, runtmp.output("db"), ksize=19, moltype="dayhoff", scaled=100
+    )
+
+    out_csv = runtmp.output("out.csv")
+    runtmp.sourmash(
+        "scripts",
+        "fastmultigather",
+        sigs,
+        sigs_db,
+        "-s",
+        "100",
+        "--moltype",
+        "dayhoff",
+        "-k",
+        "19",
+        "-o",
+        out_csv,
+    )
 
     assert os.path.exists(out_csv)
 
     df = pandas.read_csv(out_csv)
     assert len(df) == 2
     keys = set(df.keys())
-    expected_keys = {'match_name', 'query_filename', 'query_n_hashes', 'match_filename', 'f_match_orig',
-            'query_bp', 'query_abundance', 'match_containment_ani', 'intersect_bp', 'total_weighted_hashes',
-            'n_unique_weighted_found', 'query_name', 'gather_result_rank', 'moltype',
-            'query_containment_ani', 'sum_weighted_found', 'f_orig_query', 'ksize', 'max_containment_ani',
-            'std_abund', 'scaled', 'average_containment_ani', 'f_match', 'f_unique_to_query',
-            'average_abund', 'unique_intersect_bp', 'median_abund', 'query_md5', 'match_md5', 'remaining_bp',
-            'f_unique_weighted'}
+    expected_keys = {
+        "match_name",
+        "query_filename",
+        "query_n_hashes",
+        "match_filename",
+        "f_match_orig",
+        "query_bp",
+        "query_abundance",
+        "match_containment_ani",
+        "intersect_bp",
+        "total_weighted_hashes",
+        "n_unique_weighted_found",
+        "query_name",
+        "gather_result_rank",
+        "moltype",
+        "query_containment_ani",
+        "sum_weighted_found",
+        "f_orig_query",
+        "ksize",
+        "max_containment_ani",
+        "std_abund",
+        "scaled",
+        "average_containment_ani",
+        "f_match",
+        "f_unique_to_query",
+        "average_abund",
+        "unique_intersect_bp",
+        "median_abund",
+        "query_md5",
+        "match_md5",
+        "remaining_bp",
+        "f_unique_weighted",
+    }
     assert keys == expected_keys
     print(df)
     # since we're just matching to identical sigs, the md5s should be the same
-    assert df['query_md5'][0] == df['match_md5'][0]
-    assert df['query_md5'][1] == df['match_md5'][1]
+    assert df["query_md5"][0] == df["match_md5"][0]
+    assert df["query_md5"][1] == df["match_md5"][1]
 
 
 def test_simple_hp_indexed(runtmp):
     # test basic protein execution
-    sigs = get_test_data('hp.zip')
-
-    sigs_db = index_siglist(runtmp, sigs, runtmp.output('db'), ksize=19, moltype='hp', scaled=100)
-
-    out_csv = runtmp.output('out.csv')
-    runtmp.sourmash('scripts', 'fastmultigather', sigs, sigs_db,
-                    '-s', '100', '--moltype', 'hp', '-k', '19',
-                    '-o', out_csv)
+    sigs = get_test_data("hp.zip")
+
+    sigs_db = index_siglist(
+        runtmp, sigs, runtmp.output("db"), ksize=19, moltype="hp", scaled=100
+    )
+
+    out_csv = runtmp.output("out.csv")
+    runtmp.sourmash(
+        "scripts",
+        "fastmultigather",
+        sigs,
+        sigs_db,
+        "-s",
+        "100",
+        "--moltype",
+        "hp",
+        "-k",
+        "19",
+        "-o",
+        out_csv,
+    )
 
     assert os.path.exists(out_csv)
 
     df = pandas.read_csv(out_csv)
     assert len(df) == 2
     keys = set(df.keys())
-    expected_keys = {'match_name', 'query_filename', 'query_n_hashes', 'match_filename', 'f_match_orig',
-            'query_bp', 'query_abundance', 'match_containment_ani', 'intersect_bp', 'total_weighted_hashes',
-            'n_unique_weighted_found', 'query_name', 'gather_result_rank', 'moltype',
-            'query_containment_ani', 'sum_weighted_found', 'f_orig_query', 'ksize', 'max_containment_ani',
-            'std_abund', 'scaled', 'average_containment_ani', 'f_match', 'f_unique_to_query',
-            'average_abund', 'unique_intersect_bp', 'median_abund', 'query_md5', 'match_md5', 'remaining_bp',
-            'f_unique_weighted'}
+    expected_keys = {
+        "match_name",
+        "query_filename",
+        "query_n_hashes",
+        "match_filename",
+        "f_match_orig",
+        "query_bp",
+        "query_abundance",
+        "match_containment_ani",
+        "intersect_bp",
+        "total_weighted_hashes",
+        "n_unique_weighted_found",
+        "query_name",
+        "gather_result_rank",
+        "moltype",
+        "query_containment_ani",
+        "sum_weighted_found",
+        "f_orig_query",
+        "ksize",
+        "max_containment_ani",
+        "std_abund",
+        "scaled",
+        "average_containment_ani",
+        "f_match",
+        "f_unique_to_query",
+        "average_abund",
+        "unique_intersect_bp",
+        "median_abund",
+        "query_md5",
+        "match_md5",
+        "remaining_bp",
+        "f_unique_weighted",
+    }
     assert keys == expected_keys
     print(df)
     # since we're just matching to identical sigs, the md5s should be the same
-    assert df['query_md5'][0] == df['match_md5'][0]
-    assert df['query_md5'][1] == df['match_md5'][1]
+    assert df["query_md5"][0] == df["match_md5"][0]
+    assert df["query_md5"][1] == df["match_md5"][1]
 
 
 def test_indexed_full_output(runtmp):
     # test correct md5s present in output
-    query = get_test_data('SRR606249.sig.gz')
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    query = get_test_data("SRR606249.sig.gz")
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
     make_file_list(query_list, [query])
     make_file_list(against_list, [sig2, sig47, sig63])
 
-    g_output = runtmp.output('out.csv')
-    against_db = index_siglist(runtmp, against_list, runtmp.output('rocksdb'))
-    runtmp.sourmash('scripts', 'fastmultigather', query_list,
-                    against_db, '-s', '100000', '-t', '0',
-                    '-o', g_output)
+    g_output = runtmp.output("out.csv")
+    against_db = index_siglist(runtmp, against_list, runtmp.output("rocksdb"))
+    runtmp.sourmash(
+        "scripts",
+        "fastmultigather",
+        query_list,
+        against_db,
+        "-s",
+        "100000",
+        "-t",
+        "0",
+        "-o",
+        g_output,
+    )
 
     # check full gather output
     assert os.path.exists(g_output)
     df = pandas.read_csv(g_output)
     assert len(df) == 3
     keys = set(df.keys())
-    expected_keys = {'match_name', 'query_filename', 'query_n_hashes', 'match_filename', 'f_match_orig',
-            'query_bp', 'query_abundance', 'match_containment_ani', 'intersect_bp', 'total_weighted_hashes',
-            'n_unique_weighted_found', 'query_name', 'gather_result_rank', 'moltype',
-            'query_containment_ani', 'sum_weighted_found', 'f_orig_query', 'ksize', 'max_containment_ani',
-            'std_abund', 'scaled', 'average_containment_ani', 'f_match', 'f_unique_to_query',
-            'average_abund', 'unique_intersect_bp', 'median_abund', 'query_md5', 'match_md5', 'remaining_bp',
-            'f_unique_weighted'}
+    expected_keys = {
+        "match_name",
+        "query_filename",
+        "query_n_hashes",
+        "match_filename",
+        "f_match_orig",
+        "query_bp",
+        "query_abundance",
+        "match_containment_ani",
+        "intersect_bp",
+        "total_weighted_hashes",
+        "n_unique_weighted_found",
+        "query_name",
+        "gather_result_rank",
+        "moltype",
+        "query_containment_ani",
+        "sum_weighted_found",
+        "f_orig_query",
+        "ksize",
+        "max_containment_ani",
+        "std_abund",
+        "scaled",
+        "average_containment_ani",
+        "f_match",
+        "f_unique_to_query",
+        "average_abund",
+        "unique_intersect_bp",
+        "median_abund",
+        "query_md5",
+        "match_md5",
+        "remaining_bp",
+        "f_unique_weighted",
+    }
     assert keys == expected_keys
     results = df.values.tolist()
 
     # check a few columns
-    average_ani = set(df['average_containment_ani'])
+    average_ani = set(df["average_containment_ani"])
     avg_ani = set([round(x, 4) for x in average_ani])
-    assert avg_ani == {0.9221, 0.9306, 0.9316} # @CTB check against py gather
+    assert avg_ani == {0.9221, 0.9306, 0.9316}  # @CTB check against py gather
 
-    f_unique_weighted = set(df['f_unique_weighted'])
+    f_unique_weighted = set(df["f_unique_weighted"])
     f_unique_weighted = set([round(x, 4) for x in f_unique_weighted])
     assert f_unique_weighted == {0.0063, 0.002, 0.0062}
 
-    unique_intersect_bp = set(df['unique_intersect_bp'])
-    unique_intersect_bp = set([round(x,4) for x in unique_intersect_bp])
+    unique_intersect_bp = set(df["unique_intersect_bp"])
+    unique_intersect_bp = set([round(x, 4) for x in unique_intersect_bp])
     assert unique_intersect_bp == {4400000, 1800000, 2200000}
 
 
 def test_nonindexed_full_vs_sourmash_gather(runtmp):
-    query = get_test_data('SRR606249.sig.gz')
+    query = get_test_data("SRR606249.sig.gz")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
-    query_list = runtmp.output('query.txt')
+    query_list = runtmp.output("query.txt")
     make_file_list(query_list, [query])
-    against_list = runtmp.output('against.txt')
+    against_list = runtmp.output("against.txt")
     make_file_list(against_list, [sig2, sig47, sig63])
 
-    g_output = runtmp.output('SRR606249.gather.csv')
-    runtmp.sourmash('scripts', 'fastmultigather', query_list,
-                    against_list, '-s', '100000', '-t', '0')
+    g_output = runtmp.output("SRR606249.gather.csv")
+    runtmp.sourmash(
+        "scripts",
+        "fastmultigather",
+        query_list,
+        against_list,
+        "-s",
+        "100000",
+        "-t",
+        "0",
+    )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
     assert os.path.exists(g_output)
     # now run sourmash gather
-    sg_output = runtmp.output('.csv')
-    runtmp.sourmash('gather', query, against_list,
-                    '-o', sg_output, '--scaled', '100000')
+    sg_output = runtmp.output(".csv")
+    runtmp.sourmash(
+        "gather", query, against_list, "-o", sg_output, "--scaled", "100000"
+    )
 
     gather_df = pandas.read_csv(g_output)
     g_keys = set(gather_df.keys())
@@ -1034,162 +1661,208 @@ def test_nonindexed_full_vs_sourmash_gather(runtmp):
     sg_keys = set(sourmash_gather_df.keys())
     print(sg_keys)
     modified_keys = ["match_md5", "match_name", "match_filename"]
-    sg_keys.update(modified_keys) # fastmultigather is more explicit (match_md5 instead of md5, etc)
-    print('g_keys - sg_keys:', g_keys - sg_keys)
+    sg_keys.update(
+        modified_keys
+    )  # fastmultigather is more explicit (match_md5 instead of md5, etc)
+    print("g_keys - sg_keys:", g_keys - sg_keys)
     assert not g_keys - sg_keys, g_keys - sg_keys
 
     for index, row in sourmash_gather_df.iterrows():
         print(row.to_dict())
 
-    fmg_intersect_bp = set(gather_df['intersect_bp'])
-    g_intersect_bp = set(sourmash_gather_df['intersect_bp'])
+    fmg_intersect_bp = set(gather_df["intersect_bp"])
+    g_intersect_bp = set(sourmash_gather_df["intersect_bp"])
     assert fmg_intersect_bp == g_intersect_bp == set([4400000, 4100000, 2200000])
 
-    fmg_f_orig_query =  set([round(x,4) for x in gather_df['f_orig_query']])
-    g_f_orig_query =  set([round(x,4) for x in sourmash_gather_df['f_orig_query']])
+    fmg_f_orig_query = set([round(x, 4) for x in gather_df["f_orig_query"]])
+    g_f_orig_query = set([round(x, 4) for x in sourmash_gather_df["f_orig_query"]])
     assert fmg_f_orig_query == g_f_orig_query == set([0.0098, 0.0105, 0.0052])
 
-    fmg_f_match =  set([round(x,4) for x in gather_df['f_match']])
-    g_f_match =  set([round(x,4) for x in sourmash_gather_df['f_match']])
+    fmg_f_match = set([round(x, 4) for x in gather_df["f_match"]])
+    g_f_match = set([round(x, 4) for x in sourmash_gather_df["f_match"]])
     assert fmg_f_match == g_f_match == set([0.439, 1.0])
 
-    fmg_f_unique_to_query =  set([round(x,3) for x in gather_df['f_unique_to_query']]) # rounding to 4 --> slightly different!
-    g_f_unique_to_query =  set([round(x,3) for x in sourmash_gather_df['f_unique_to_query']])
+    fmg_f_unique_to_query = set(
+        [round(x, 3) for x in gather_df["f_unique_to_query"]]
+    )  # rounding to 4 --> slightly different!
+    g_f_unique_to_query = set(
+        [round(x, 3) for x in sourmash_gather_df["f_unique_to_query"]]
+    )
     assert fmg_f_unique_to_query == g_f_unique_to_query == set([0.004, 0.01, 0.005])
 
-    fmg_f_unique_weighted =  set([round(x,4) for x in gather_df['f_unique_weighted']])
-    g_f_unique_weighted =  set([round(x,4) for x in sourmash_gather_df['f_unique_weighted']])
-    assert fmg_f_unique_weighted== g_f_unique_weighted == set([0.0063, 0.002, 0.0062])
-
-    fmg_average_abund =  set([round(x,4) for x in gather_df['average_abund']])
-    g_average_abund =  set([round(x,4) for x in sourmash_gather_df['average_abund']])
-    assert fmg_average_abund== g_average_abund == set([8.2222, 10.3864, 21.0455])
-
-    fmg_median_abund =  set([round(x,4) for x in gather_df['median_abund']])
-    g_median_abund =  set([round(x,4) for x in sourmash_gather_df['median_abund']])
-    assert fmg_median_abund== g_median_abund == set([8.0, 10.5, 21.5])
-
-    fmg_std_abund =  set([round(x,4) for x in gather_df['std_abund']])
-    g_std_abund =  set([round(x,4) for x in sourmash_gather_df['std_abund']])
-    assert fmg_std_abund== g_std_abund == set([3.172, 5.6446, 6.9322])
-
-    g_match_filename_basename = [os.path.basename(filename) for filename in sourmash_gather_df['filename']]
-    fmg_match_filename_basename = [os.path.basename(filename) for filename in gather_df['match_filename']]
-    assert all([x in fmg_match_filename_basename for x in ['2.fa.sig.gz', '63.fa.sig.gz', '47.fa.sig.gz']])
+    fmg_f_unique_weighted = set([round(x, 4) for x in gather_df["f_unique_weighted"]])
+    g_f_unique_weighted = set(
+        [round(x, 4) for x in sourmash_gather_df["f_unique_weighted"]]
+    )
+    assert fmg_f_unique_weighted == g_f_unique_weighted == set([0.0063, 0.002, 0.0062])
+
+    fmg_average_abund = set([round(x, 4) for x in gather_df["average_abund"]])
+    g_average_abund = set([round(x, 4) for x in sourmash_gather_df["average_abund"]])
+    assert fmg_average_abund == g_average_abund == set([8.2222, 10.3864, 21.0455])
+
+    fmg_median_abund = set([round(x, 4) for x in gather_df["median_abund"]])
+    g_median_abund = set([round(x, 4) for x in sourmash_gather_df["median_abund"]])
+    assert fmg_median_abund == g_median_abund == set([8.0, 10.5, 21.5])
+
+    fmg_std_abund = set([round(x, 4) for x in gather_df["std_abund"]])
+    g_std_abund = set([round(x, 4) for x in sourmash_gather_df["std_abund"]])
+    assert fmg_std_abund == g_std_abund == set([3.172, 5.6446, 6.9322])
+
+    g_match_filename_basename = [
+        os.path.basename(filename) for filename in sourmash_gather_df["filename"]
+    ]
+    fmg_match_filename_basename = [
+        os.path.basename(filename) for filename in gather_df["match_filename"]
+    ]
+    assert all(
+        [
+            x in fmg_match_filename_basename
+            for x in ["2.fa.sig.gz", "63.fa.sig.gz", "47.fa.sig.gz"]
+        ]
+    )
     assert fmg_match_filename_basename == g_match_filename_basename
 
-    assert list(sourmash_gather_df['name']) == list(gather_df['match_name'])
-    assert list(sourmash_gather_df['md5']) == list(gather_df['match_md5'])
+    assert list(sourmash_gather_df["name"]) == list(gather_df["match_name"])
+    assert list(sourmash_gather_df["md5"]) == list(gather_df["match_md5"])
 
-    fmg_f_match_orig =  set([round(x,4) for x in gather_df['f_match_orig']])
-    g_f_match_orig =  set([round(x,4) for x in sourmash_gather_df['f_match_orig']])
+    fmg_f_match_orig = set([round(x, 4) for x in gather_df["f_match_orig"]])
+    g_f_match_orig = set([round(x, 4) for x in sourmash_gather_df["f_match_orig"]])
     assert fmg_f_match_orig == g_f_match_orig == set([1.0])
 
-    fmg_unique_intersect_bp = set(gather_df['unique_intersect_bp'])
-    g_unique_intersect_bp = set(sourmash_gather_df['unique_intersect_bp'])
-    assert fmg_unique_intersect_bp == g_unique_intersect_bp == set([4400000, 1800000, 2200000])
+    fmg_unique_intersect_bp = set(gather_df["unique_intersect_bp"])
+    g_unique_intersect_bp = set(sourmash_gather_df["unique_intersect_bp"])
+    assert (
+        fmg_unique_intersect_bp
+        == g_unique_intersect_bp
+        == set([4400000, 1800000, 2200000])
+    )
 
-    fmg_gather_result_rank= set(gather_df['gather_result_rank'])
-    g_gather_result_rank = set(sourmash_gather_df['gather_result_rank'])
-    assert fmg_gather_result_rank == g_gather_result_rank == set([0,1,2])
+    fmg_gather_result_rank = set(gather_df["gather_result_rank"])
+    g_gather_result_rank = set(sourmash_gather_df["gather_result_rank"])
+    assert fmg_gather_result_rank == g_gather_result_rank == set([0, 1, 2])
 
-    fmg_remaining_bp = list(gather_df['remaining_bp'])
+    fmg_remaining_bp = list(gather_df["remaining_bp"])
     assert fmg_remaining_bp == [415600000, 413400000, 411600000]
     ### Gather remaining bp does not match, but I think this one is right?
-    #g_remaining_bp = list(sourmash_gather_df['remaining_bp'])
-    #print("gather remaining bp: ", g_remaining_bp) #{4000000, 0, 1800000}
+    # g_remaining_bp = list(sourmash_gather_df['remaining_bp'])
+    # print("gather remaining bp: ", g_remaining_bp) #{4000000, 0, 1800000}
     # assert fmg_remaining_bp == g_remaining_bp == set([])
 
-    fmg_query_containment_ani = set([round(x,4) for x in gather_df['query_containment_ani']])
-    g_query_containment_ani = set([round(x,4) for x in sourmash_gather_df['query_containment_ani']])
+    fmg_query_containment_ani = set(
+        [round(x, 4) for x in gather_df["query_containment_ani"]]
+    )
+    g_query_containment_ani = set(
+        [round(x, 4) for x in sourmash_gather_df["query_containment_ani"]]
+    )
     assert fmg_query_containment_ani == {0.8442, 0.8613, 0.8632}
     # gather cANI are nans here -- perhaps b/c sketches too small
     # assert fmg_query_containment_ani == g_query_containment_ani == set([0.8632, 0.8444, 0.8391])
     print("fmg qcANI: ", fmg_query_containment_ani)
     print("g_qcANI: ", g_query_containment_ani)
 
-    fmg_n_unique_weighted_found= set(gather_df['n_unique_weighted_found'])
-    g_n_unique_weighted_found = set(sourmash_gather_df['n_unique_weighted_found'])
-    assert fmg_n_unique_weighted_found == g_n_unique_weighted_found == set([457, 148, 463])
+    fmg_n_unique_weighted_found = set(gather_df["n_unique_weighted_found"])
+    g_n_unique_weighted_found = set(sourmash_gather_df["n_unique_weighted_found"])
+    assert (
+        fmg_n_unique_weighted_found == g_n_unique_weighted_found == set([457, 148, 463])
+    )
 
-    fmg_sum_weighted_found= set(gather_df['sum_weighted_found'])
-    g_sum_weighted_found = set(sourmash_gather_df['sum_weighted_found'])
+    fmg_sum_weighted_found = set(gather_df["sum_weighted_found"])
+    g_sum_weighted_found = set(sourmash_gather_df["sum_weighted_found"])
     assert fmg_sum_weighted_found == g_sum_weighted_found == set([920, 457, 1068])
 
-    fmg_total_weighted_hashes= set(gather_df['total_weighted_hashes'])
-    g_total_weighted_hashes = set(sourmash_gather_df['total_weighted_hashes'])
+    fmg_total_weighted_hashes = set(gather_df["total_weighted_hashes"])
+    g_total_weighted_hashes = set(sourmash_gather_df["total_weighted_hashes"])
     assert fmg_total_weighted_hashes == g_total_weighted_hashes == set([73489])
 
 
 def test_rocksdb_gather_against_index_with_sigs(runtmp, capfd):
     # fastmultigather should succeed if indexed sigs are stored internally.
-    query = get_test_data('SRR606249.sig.gz')
+    query = get_test_data("SRR606249.sig.gz")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
-    shutil.copyfile(sig2, runtmp.output('2.fa.sig.gz'))
-    shutil.copyfile(sig47, runtmp.output('47.fa.sig.gz'))
-    shutil.copyfile(sig63, runtmp.output('63.fa.sig.gz'))
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
+    shutil.copyfile(sig2, runtmp.output("2.fa.sig.gz"))
+    shutil.copyfile(sig47, runtmp.output("47.fa.sig.gz"))
+    shutil.copyfile(sig63, runtmp.output("63.fa.sig.gz"))
 
-    query_list = runtmp.output('query.txt')
+    query_list = runtmp.output("query.txt")
     make_file_list(query_list, [query])
-    against_list = runtmp.output('against.txt')
-    make_file_list(against_list, ["2.fa.sig.gz",
-                                  "47.fa.sig.gz",
-                                  "63.fa.sig.gz"])
+    against_list = runtmp.output("against.txt")
+    make_file_list(against_list, ["2.fa.sig.gz", "47.fa.sig.gz", "63.fa.sig.gz"])
 
     # index! note: '--internal-storage' defaults to True
-    runtmp.sourmash('scripts', 'index', against_list,
-                    '-o', 'subdir/against.rocksdb')
+    runtmp.sourmash("scripts", "index", against_list, "-o", "subdir/against.rocksdb")
 
     # remove the external storage out from under the rocksdb.
-    os.unlink(runtmp.output('2.fa.sig.gz'))
-    os.unlink(runtmp.output('47.fa.sig.gz'))
-    os.unlink(runtmp.output('63.fa.sig.gz'))
-
-    g_output = runtmp.output('zzz.csv')
-
-    runtmp.sourmash('scripts', 'fastmultigather', query_list,
-                    'subdir/against.rocksdb', '-s', '100000', '-t', '0',
-                    '-o', g_output,
-                    in_location=runtmp.output(''))
+    os.unlink(runtmp.output("2.fa.sig.gz"))
+    os.unlink(runtmp.output("47.fa.sig.gz"))
+    os.unlink(runtmp.output("63.fa.sig.gz"))
+
+    g_output = runtmp.output("zzz.csv")
+
+    runtmp.sourmash(
+        "scripts",
+        "fastmultigather",
+        query_list,
+        "subdir/against.rocksdb",
+        "-s",
+        "100000",
+        "-t",
+        "0",
+        "-o",
+        g_output,
+        in_location=runtmp.output(""),
+    )
 
 
 def test_rocksdb_no_internal_storage_gather_fails(runtmp, capfd):
     # force gather to fail b/c we make an index with no internal sketches
-    query = get_test_data('SRR606249.sig.gz')
+    query = get_test_data("SRR606249.sig.gz")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
-    shutil.copyfile(sig2, runtmp.output('2.fa.sig.gz'))
-    shutil.copyfile(sig47, runtmp.output('47.fa.sig.gz'))
-    shutil.copyfile(sig63, runtmp.output('63.fa.sig.gz'))
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
+    shutil.copyfile(sig2, runtmp.output("2.fa.sig.gz"))
+    shutil.copyfile(sig47, runtmp.output("47.fa.sig.gz"))
+    shutil.copyfile(sig63, runtmp.output("63.fa.sig.gz"))
 
-    query_list = runtmp.output('query.txt')
+    query_list = runtmp.output("query.txt")
     make_file_list(query_list, [query])
-    against_list = runtmp.output('against.txt')
-    make_file_list(against_list, ["2.fa.sig.gz",
-                                  "47.fa.sig.gz",
-                                  "63.fa.sig.gz"])
-
-    runtmp.sourmash('scripts', 'index', against_list, '--no-internal-storage',
-                    '-o', 'subdir/against.rocksdb')
+    against_list = runtmp.output("against.txt")
+    make_file_list(against_list, ["2.fa.sig.gz", "47.fa.sig.gz", "63.fa.sig.gz"])
+
+    runtmp.sourmash(
+        "scripts",
+        "index",
+        against_list,
+        "--no-internal-storage",
+        "-o",
+        "subdir/against.rocksdb",
+    )
 
     # remove the external storage out from under the rocksdb.
     # this will make gather fail.
-    os.unlink(runtmp.output('2.fa.sig.gz'))
-    os.unlink(runtmp.output('47.fa.sig.gz'))
-    os.unlink(runtmp.output('63.fa.sig.gz'))
+    os.unlink(runtmp.output("2.fa.sig.gz"))
+    os.unlink(runtmp.output("47.fa.sig.gz"))
+    os.unlink(runtmp.output("63.fa.sig.gz"))
 
-    g_output = runtmp.output('zzz.csv')
+    g_output = runtmp.output("zzz.csv")
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'fastmultigather', query_list,
-                        'subdir/against.rocksdb', '-s', '100000', '-t', '0',
-                        '-o', g_output,
-                        in_location=runtmp.output(''))
+        runtmp.sourmash(
+            "scripts",
+            "fastmultigather",
+            query_list,
+            "subdir/against.rocksdb",
+            "-s",
+            "100000",
+            "-t",
+            "0",
+            "-o",
+            g_output,
+            in_location=runtmp.output(""),
+        )
 
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
@@ -1199,51 +1872,76 @@ def test_rocksdb_no_internal_storage_gather_fails(runtmp, capfd):
 
     assert "Error gathering matches:" in captured.err
     assert "ERROR: 1 failed gathers. See error messages above." in captured.err
-    assert "Unresolvable errors found; results cannot be trusted. Quitting." in captured.err
-
+    assert (
+        "Unresolvable errors found; results cannot be trusted. Quitting."
+        in captured.err
+    )
 
 
 def test_save_matches(runtmp):
     # test basic execution!
-    query = get_test_data('SRR606249.sig.gz')
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    query = get_test_data("SRR606249.sig.gz")
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
     make_file_list(query_list, [query])
     make_file_list(against_list, [sig2, sig47, sig63])
 
-  
-    runtmp.sourmash('scripts', 'fastmultigather', query_list, against_list,
-                    '-s', '100000', '-t', '0', '--save-matches',
-                    in_directory=runtmp.output(''))
-
-    print(os.listdir(runtmp.output('')))
-
-    g_output = runtmp.output('SRR606249.gather.csv')
-    p_output = runtmp.output('SRR606249.prefetch.csv')
-    m_output = runtmp.output('SRR606249.matches.sig')
+    runtmp.sourmash(
+        "scripts",
+        "fastmultigather",
+        query_list,
+        against_list,
+        "-s",
+        "100000",
+        "-t",
+        "0",
+        "--save-matches",
+        in_directory=runtmp.output(""),
+    )
+
+    print(os.listdir(runtmp.output("")))
+
+    g_output = runtmp.output("SRR606249.gather.csv")
+    p_output = runtmp.output("SRR606249.prefetch.csv")
+    m_output = runtmp.output("SRR606249.matches.sig")
 
     assert os.path.exists(g_output)
     assert os.path.exists(p_output)
     assert os.path.exists(m_output)
-    
+
     # check prefetch output (only non-indexed gather)
     df = pandas.read_csv(p_output)
-        
+
     assert len(df) == 3
     keys = set(df.keys())
-    assert keys == {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'intersect_bp'}
+    assert keys == {
+        "query_filename",
+        "query_name",
+        "query_md5",
+        "match_name",
+        "match_md5",
+        "intersect_bp",
+    }
 
     assert os.path.exists(g_output)
     df = pandas.read_csv(g_output)
 
     assert len(df) == 3
     keys = set(df.keys())
-    assert {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'intersect_bp', 'gather_result_rank'}.issubset(keys)
+    assert {
+        "query_filename",
+        "query_name",
+        "query_md5",
+        "match_name",
+        "match_md5",
+        "intersect_bp",
+        "gather_result_rank",
+    }.issubset(keys)
 
     # can't test against prefetch because matched k-mers can overlap
     match_ss = list(sourmash.load_file_as_signatures(m_output, ksize=31))[0]
@@ -1251,7 +1949,7 @@ def test_save_matches(runtmp):
     matches_sig_len = len(match_mh)
 
     # right size?
-    assert sum(df['intersect_bp']) >= matches_sig_len * 100_000
+    assert sum(df["intersect_bp"]) >= matches_sig_len * 100_000
 
     # containment?
     mg_ss = list(sourmash.load_file_as_signatures(query, ksize=31))[0]
@@ -1261,21 +1959,31 @@ def test_save_matches(runtmp):
 
 def test_create_empty_results(runtmp):
     # sig2 has 0 hashes in common with 47 and 63
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
     make_file_list(query_list, [sig2])
     make_file_list(against_list, [sig47, sig63])
 
-    runtmp.sourmash('scripts', 'fastmultigather', query_list, against_list,
-                    '-s', '100000', '-t', '0', '--create-empty-results', in_directory=runtmp.output(''))
-
-    print(os.listdir(runtmp.output('')))
-
-    g_output = runtmp.output('CP001071.1.gather.csv')
-    p_output = runtmp.output('CP001071.1.prefetch.csv')
+    runtmp.sourmash(
+        "scripts",
+        "fastmultigather",
+        query_list,
+        against_list,
+        "-s",
+        "100000",
+        "-t",
+        "0",
+        "--create-empty-results",
+        in_directory=runtmp.output(""),
+    )
+
+    print(os.listdir(runtmp.output("")))
+
+    g_output = runtmp.output("CP001071.1.gather.csv")
+    p_output = runtmp.output("CP001071.1.prefetch.csv")
     assert os.path.exists(p_output)
diff --git a/src/python/tests/test_index.py b/src/python/tests/test_index.py
index 105c1cb2..9e8a1d4a 100644
--- a/src/python/tests/test_index.py
+++ b/src/python/tests/test_index.py
@@ -5,75 +5,77 @@
 import shutil
 
 from . import sourmash_tst_utils as utils
-from .sourmash_tst_utils import (get_test_data, make_file_list, zip_siglist)
+from .sourmash_tst_utils import get_test_data, make_file_list, zip_siglist
 
 
 def test_installed(runtmp):
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'index')
+        runtmp.sourmash("scripts", "index")
 
-    assert 'usage:  index' in runtmp.last_result.err
+    assert "usage:  index" in runtmp.last_result.err
 
 
 def test_index(runtmp, toggle_internal_storage):
     # test basic index!
-    siglist = runtmp.output('db-sigs.txt')
+    siglist = runtmp.output("db-sigs.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(siglist, [sig2, sig47, sig63])
 
-    output = runtmp.output('db.rocksdb')
+    output = runtmp.output("db.rocksdb")
 
-    runtmp.sourmash('scripts', 'index', siglist,
-                    '-o', output, toggle_internal_storage)
+    runtmp.sourmash("scripts", "index", siglist, "-o", output, toggle_internal_storage)
     assert os.path.exists(output)
     print(runtmp.last_result.err)
 
-    assert 'index is done' in runtmp.last_result.err
+    assert "index is done" in runtmp.last_result.err
 
 
 def test_index_warning_message(runtmp, capfd):
     # test basic index when it has to load things into memory - see #451.
-    siglist = runtmp.output('db-sigs.txt')
+    siglist = runtmp.output("db-sigs.txt")
 
     # note: can't use zip w/o breaking index. See sourmash-bio/sourmash#3321.
-    sig2 = get_test_data('2.sig.zip')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.sig.zip")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(siglist, [sig2, sig47, sig63])
 
-    output = runtmp.output('db.rocksdb')
+    output = runtmp.output("db.rocksdb")
 
-    runtmp.sourmash('scripts', 'index', siglist, '-o', output)
+    runtmp.sourmash("scripts", "index", siglist, "-o", output)
     assert os.path.exists(output)
     print(runtmp.last_result.err)
 
-    assert 'index is done' in runtmp.last_result.err
+    assert "index is done" in runtmp.last_result.err
     captured = capfd.readouterr()
     print(captured.err)
-    assert "WARNING: loading all sketches into memory in order to index." in captured.err
+    assert (
+        "WARNING: loading all sketches into memory in order to index." in captured.err
+    )
 
 
 def test_index_error_message(runtmp, capfd):
     # test basic index when it errors out b/c can't load
-    siglist = runtmp.output('db-sigs.txt')
+    siglist = runtmp.output("db-sigs.txt")
 
     # note: can't use zip w/o breaking index. See sourmash-bio/sourmash#3321.
-    sig2 = get_test_data('2.sig.zip')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.sig.zip")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(siglist, [sig2, sig47, sig63])
 
-    output = runtmp.output('db.rocksdb')
+    output = runtmp.output("db.rocksdb")
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'index', siglist, '-o', output,
-                        '--no-internal-storage')
+        runtmp.sourmash(
+            "scripts", "index", siglist, "-o", output, "--no-internal-storage"
+        )
 
     captured = capfd.readouterr()
     print(captured.err)
@@ -84,127 +86,160 @@ def test_index_recursive(runtmp, capfd):
     # test index of pathlist containing standalone manifest containing zip.
     # a little ridiculous, but should hit the various branches in
     # MultiCollection::load
-    siglist = runtmp.output('db-sigs.txt')
+    siglist = runtmp.output("db-sigs.txt")
 
     # our basic list of sketches...
-    sig2_zip = get_test_data('2.sig.zip')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2_zip = get_test_data("2.sig.zip")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     # generate a standalone mf containing a sip
-    standalone_mf = runtmp.output('stand-mf.csv')
-    runtmp.sourmash('sig', 'collect', '-F', 'csv', '-o', standalone_mf,
-                    sig2_zip)
+    standalone_mf = runtmp.output("stand-mf.csv")
+    runtmp.sourmash("sig", "collect", "-F", "csv", "-o", standalone_mf, sig2_zip)
 
     # now make a file list containing that mf
     make_file_list(siglist, [standalone_mf, sig47, sig63])
 
-    output = runtmp.output('db.rocksdb')
+    output = runtmp.output("db.rocksdb")
 
-    runtmp.sourmash('scripts', 'index', siglist, '-o', output)
+    runtmp.sourmash("scripts", "index", siglist, "-o", output)
 
     captured = capfd.readouterr()
     print(captured.err)
-    assert "WARNING: loading all sketches into memory in order to index." in captured.err
-    assert 'index is done' in runtmp.last_result.err
-    assert 'Indexing 3 sketches.' in captured.err
+    assert (
+        "WARNING: loading all sketches into memory in order to index." in captured.err
+    )
+    assert "index is done" in runtmp.last_result.err
+    assert "Indexing 3 sketches." in captured.err
 
 
 def test_index_protein(runtmp, toggle_internal_storage):
-    sigs = get_test_data('protein.zip')
-    output = runtmp.output('db.rocksdb')
-
-    runtmp.sourmash('scripts', 'index', sigs, '-k', '19', '-s', '100',
-                    '--moltype', 'protein', '-o', output,
-                    toggle_internal_storage)
+    sigs = get_test_data("protein.zip")
+    output = runtmp.output("db.rocksdb")
+
+    runtmp.sourmash(
+        "scripts",
+        "index",
+        sigs,
+        "-k",
+        "19",
+        "-s",
+        "100",
+        "--moltype",
+        "protein",
+        "-o",
+        output,
+        toggle_internal_storage,
+    )
     assert os.path.exists(output)
     print(runtmp.last_result.err)
-    assert 'index is done' in runtmp.last_result.err
+    assert "index is done" in runtmp.last_result.err
 
 
 def test_index_dayhoff(runtmp, toggle_internal_storage):
-    sigs = get_test_data('dayhoff.zip')
-    output = runtmp.output('db.rocksdb')
-
-    runtmp.sourmash('scripts', 'index', sigs, '-k', '19', '-s', '100',
-                    '--moltype', 'dayhoff', '-o', output,
-                    toggle_internal_storage)
+    sigs = get_test_data("dayhoff.zip")
+    output = runtmp.output("db.rocksdb")
+
+    runtmp.sourmash(
+        "scripts",
+        "index",
+        sigs,
+        "-k",
+        "19",
+        "-s",
+        "100",
+        "--moltype",
+        "dayhoff",
+        "-o",
+        output,
+        toggle_internal_storage,
+    )
     assert os.path.exists(output)
     print(runtmp.last_result.err)
-    assert 'index is done' in runtmp.last_result.err
+    assert "index is done" in runtmp.last_result.err
 
 
 def test_index_protein(runtmp, toggle_internal_storage):
-    sigs = get_test_data('hp.zip')
-    output = runtmp.output('db.rocksdb')
-
-    runtmp.sourmash('scripts', 'index', sigs, '-k', '19', '-s', '100',
-                    '--moltype', 'hp', '-o', output, toggle_internal_storage)
+    sigs = get_test_data("hp.zip")
+    output = runtmp.output("db.rocksdb")
+
+    runtmp.sourmash(
+        "scripts",
+        "index",
+        sigs,
+        "-k",
+        "19",
+        "-s",
+        "100",
+        "--moltype",
+        "hp",
+        "-o",
+        output,
+        toggle_internal_storage,
+    )
     assert os.path.exists(output)
     print(runtmp.last_result.err)
-    assert 'index is done' in runtmp.last_result.err
+    assert "index is done" in runtmp.last_result.err
 
 
 def test_index_missing_siglist(runtmp, capfd, toggle_internal_storage):
     # test missing siglist file
-    siglist = runtmp.output('db-sigs.txt')
-    output = runtmp.output('out.db')
+    siglist = runtmp.output("db-sigs.txt")
+    output = runtmp.output("out.db")
     # make_file_list(siglist, []) # don't make siglist file
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'index', siglist,
-                        '-o', output, toggle_internal_storage)
+        runtmp.sourmash(
+            "scripts", "index", siglist, "-o", output, toggle_internal_storage
+        )
 
     captured = capfd.readouterr()
     print(captured.err)
-    assert 'Error: No such file or directory: ' in captured.err
+    assert "Error: No such file or directory: " in captured.err
 
 
 def test_index_sig(runtmp, capfd, toggle_internal_storage):
     # test index with a .sig.gz file instead of pathlist
     # (should work now)
-    sig2 = get_test_data('2.fa.sig.gz')
-    output = runtmp.output('out.db')
+    sig2 = get_test_data("2.fa.sig.gz")
+    output = runtmp.output("out.db")
 
-    runtmp.sourmash('scripts', 'index', sig2,
-                    '-o', output, toggle_internal_storage)
+    runtmp.sourmash("scripts", "index", sig2, "-o", output, toggle_internal_storage)
 
     captured = capfd.readouterr()
     print(captured.err)
     print(runtmp.last_result.err)
-    assert 'index is done' in runtmp.last_result.err
+    assert "index is done" in runtmp.last_result.err
 
 
 def test_index_manifest(runtmp, capfd, toggle_internal_storage):
     # test index with a manifest file
-    sig2 = get_test_data('2.fa.sig.gz')
-    output = runtmp.output('out.db')
-    sig_mf = runtmp.output('mf.csv')
+    sig2 = get_test_data("2.fa.sig.gz")
+    output = runtmp.output("out.db")
+    sig_mf = runtmp.output("mf.csv")
     runtmp.sourmash("sig", "manifest", sig2, "-o", sig_mf)
 
-    runtmp.sourmash('scripts', 'index', sig_mf,
-                        '-o', output, toggle_internal_storage)
+    runtmp.sourmash("scripts", "index", sig_mf, "-o", output, toggle_internal_storage)
 
     captured = capfd.readouterr()
     print(captured.err)
     print(runtmp.last_result.err)
-    assert 'index is done' in runtmp.last_result.err
+    assert "index is done" in runtmp.last_result.err
 
 
 def test_index_bad_siglist_2(runtmp, capfd):
     # test with a bad siglist (containing a missing file)
-    against_list = runtmp.output('against.txt')
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
     make_file_list(against_list, [sig2, "no-exist"])
 
-    db = runtmp.output('db.rocksdb')
+    db = runtmp.output("db.rocksdb")
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'index', against_list,
-                        '-o', db)
+        runtmp.sourmash("scripts", "index", against_list, "-o", db)
 
     captured = capfd.readouterr()
     print(captured.err)
@@ -213,16 +248,15 @@ def test_index_bad_siglist_2(runtmp, capfd):
 
 def test_index_empty_siglist(runtmp, capfd):
     # test empty siglist file
-    siglist = runtmp.output('db-sigs.txt')
-    output = runtmp.output('out.db')
-    make_file_list(siglist, []) # empty
+    siglist = runtmp.output("db-sigs.txt")
+    output = runtmp.output("out.db")
+    make_file_list(siglist, [])  # empty
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'index', siglist,
-                        '-o', output)
+        runtmp.sourmash("scripts", "index", siglist, "-o", output)
 
     captured = capfd.readouterr()
-    assert not os.path.exists(output) # do we want an empty file, or no file?
+    assert not os.path.exists(output)  # do we want an empty file, or no file?
     print(runtmp.last_result.out)
     print(runtmp.last_result.err)
     print(captured.err)
@@ -231,15 +265,14 @@ def test_index_empty_siglist(runtmp, capfd):
 
 def test_index_nomatch(runtmp, capfd):
     # test index with a siglist file that has (only) a non-matching ksize sig
-    siglist = runtmp.output('against.txt')
-    db = runtmp.output('db.rocksdb')
+    siglist = runtmp.output("against.txt")
+    db = runtmp.output("db.rocksdb")
 
-    sig1 = get_test_data('1.fa.k21.sig.gz')
+    sig1 = get_test_data("1.fa.k21.sig.gz")
     make_file_list(siglist, [sig1])
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'index', siglist,
-                        '-o', db)
+        runtmp.sourmash("scripts", "index", siglist, "-o", db)
 
     captured = capfd.readouterr()
     print(runtmp.last_result.out)
@@ -251,15 +284,14 @@ def test_index_nomatch(runtmp, capfd):
 
 def test_index_nomatch_sig_in_siglist(runtmp, capfd):
     # test index with a siglist file that has both matching and non-matching sigs
-    siglist = runtmp.output('against.txt')
-    db = runtmp.output('db.rocksdb')
+    siglist = runtmp.output("against.txt")
+    db = runtmp.output("db.rocksdb")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig1 = get_test_data('1.fa.k21.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig1 = get_test_data("1.fa.k21.sig.gz")
     make_file_list(siglist, [sig2, sig1])
 
-    runtmp.sourmash('scripts', 'index', siglist,
-                        '-o', db)
+    runtmp.sourmash("scripts", "index", siglist, "-o", db)
 
     captured = capfd.readouterr()
     print(runtmp.last_result.out)
@@ -271,26 +303,25 @@ def test_index_nomatch_sig_in_siglist(runtmp, capfd):
 
 def test_index_zipfile(runtmp, capfd, toggle_internal_storage):
     # test basic index from sourmash zipfile
-    siglist = runtmp.output('db-sigs.txt')
+    siglist = runtmp.output("db-sigs.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(siglist, [sig2, sig47, sig63])
 
-    zipf = runtmp.output('sigs.zip')
+    zipf = runtmp.output("sigs.zip")
 
-    runtmp.sourmash('sig', 'cat', siglist, '-o', zipf)
+    runtmp.sourmash("sig", "cat", siglist, "-o", zipf)
 
-    output = runtmp.output('db.rocksdb')
+    output = runtmp.output("db.rocksdb")
 
-    runtmp.sourmash('scripts', 'index', zipf,
-                    '-o', output, toggle_internal_storage)
+    runtmp.sourmash("scripts", "index", zipf, "-o", output, toggle_internal_storage)
     assert os.path.exists(output)
     print(runtmp.last_result.err)
 
-    assert 'index is done' in runtmp.last_result.err
+    assert "index is done" in runtmp.last_result.err
     captured = capfd.readouterr()
     print(captured.err)
 
@@ -303,113 +334,119 @@ def test_index_zipfile_subdir(runtmp, capfd, toggle_internal_storage):
     # * use non-abspath to zip file for indexing
     # so that the relative path gets things wrong.
 
-    siglist = runtmp.output('db-sigs.txt')
+    siglist = runtmp.output("db-sigs.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
-    shutil.copyfile(sig2, runtmp.output('2.fa.sig.gz'))
-    shutil.copyfile(sig47, runtmp.output('47.fa.sig.gz'))
-    shutil.copyfile(sig63, runtmp.output('63.fa.sig.gz'))
+    shutil.copyfile(sig2, runtmp.output("2.fa.sig.gz"))
+    shutil.copyfile(sig47, runtmp.output("47.fa.sig.gz"))
+    shutil.copyfile(sig63, runtmp.output("63.fa.sig.gz"))
 
-    os.mkdir(runtmp.output('subdir'))
+    os.mkdir(runtmp.output("subdir"))
 
-    zipf = 'sigs.zip'
+    zipf = "sigs.zip"
 
-    runtmp.sourmash('sig', 'cat', '2.fa.sig.gz', '47.fa.sig.gz',
-                    '63.fa.sig.gz', '-o', zipf)
+    runtmp.sourmash(
+        "sig", "cat", "2.fa.sig.gz", "47.fa.sig.gz", "63.fa.sig.gz", "-o", zipf
+    )
 
-    output = runtmp.output('subdir/db.rocksdb')
+    output = runtmp.output("subdir/db.rocksdb")
 
-    runtmp.sourmash('scripts', 'index', zipf,
-                    '-o', output, in_directory=runtmp.output(''),
-                    toggle_internal_storage=toggle_internal_storage)
+    runtmp.sourmash(
+        "scripts",
+        "index",
+        zipf,
+        "-o",
+        output,
+        in_directory=runtmp.output(""),
+        toggle_internal_storage=toggle_internal_storage,
+    )
     assert os.path.exists(output)
     print(runtmp.last_result.err)
 
-    assert 'index is done' in runtmp.last_result.err
+    assert "index is done" in runtmp.last_result.err
     captured = capfd.readouterr()
     print(captured.err)
 
-    runtmp.sourmash('scripts', 'check', 'db.rocksdb',
-                    in_directory=runtmp.output('subdir'))
-    runtmp.sourmash('scripts', 'check', 'subdir/db.rocksdb',
-                    in_directory=runtmp.output(''))
+    runtmp.sourmash(
+        "scripts", "check", "db.rocksdb", in_directory=runtmp.output("subdir")
+    )
+    runtmp.sourmash(
+        "scripts", "check", "subdir/db.rocksdb", in_directory=runtmp.output("")
+    )
 
 
 def test_index_zipfile_repeated_md5sums(runtmp, capfd, toggle_internal_storage):
     # test that we're reading all files, including repeated md5sums
-    siglist = runtmp.output('db-sigs.txt')
+    siglist = runtmp.output("db-sigs.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig2a = runtmp.output('sig2a.sig.gz')
-    sig2b = runtmp.output('sig2b.sig.gz')
-    runtmp.sourmash('sig', 'rename', sig2, 'name2', '-o', sig2a)
-    runtmp.sourmash('sig', 'rename', sig2, 'name3', '-o', sig2b)
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig2a = runtmp.output("sig2a.sig.gz")
+    sig2b = runtmp.output("sig2b.sig.gz")
+    runtmp.sourmash("sig", "rename", sig2, "name2", "-o", sig2a)
+    runtmp.sourmash("sig", "rename", sig2, "name3", "-o", sig2b)
 
     make_file_list(siglist, [sig2, sig2a, sig2b])
 
-    zipf = runtmp.output('sigs.zip')
-    runtmp.sourmash('sig', 'cat', siglist, '-o', zipf)
+    zipf = runtmp.output("sigs.zip")
+    runtmp.sourmash("sig", "cat", siglist, "-o", zipf)
 
-    output = runtmp.output('db.rocksdb')
+    output = runtmp.output("db.rocksdb")
 
-    runtmp.sourmash('scripts', 'index', zipf,
-                    '-o', output, toggle_internal_storage)
+    runtmp.sourmash("scripts", "index", zipf, "-o", output, toggle_internal_storage)
     assert os.path.exists(output)
     print(runtmp.last_result.err)
 
     captured = capfd.readouterr()
     print(captured.err)
 
-    assert 'index is done' in runtmp.last_result.err
+    assert "index is done" in runtmp.last_result.err
 
 
 def test_index_zipfile_multiparam(runtmp, capfd, toggle_internal_storage):
     # test index from sourmash zipfile with multiple ksizes / scaled /moltype
-    siglist = runtmp.output('db-sigs.txt')
+    siglist = runtmp.output("db-sigs.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
-    sig1 = get_test_data('1.combined.sig.gz')
-    srr = get_test_data('SRR606249.sig.gz')
-    prot = get_test_data('protein.zip')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
+    sig1 = get_test_data("1.combined.sig.gz")
+    srr = get_test_data("SRR606249.sig.gz")
+    prot = get_test_data("protein.zip")
 
     make_file_list(siglist, [sig2, sig47, sig63, sig1, srr, prot])
 
-    zipf = runtmp.output('sigs.zip')
+    zipf = runtmp.output("sigs.zip")
 
-    runtmp.sourmash('sig', 'cat', siglist, '-o', zipf)
+    runtmp.sourmash("sig", "cat", siglist, "-o", zipf)
 
-    output = runtmp.output('db.rocksdb')
+    output = runtmp.output("db.rocksdb")
 
-    runtmp.sourmash('scripts', 'index', zipf,
-                    '-o', output, toggle_internal_storage)
+    runtmp.sourmash("scripts", "index", zipf, "-o", output, toggle_internal_storage)
     assert os.path.exists(output)
     print(runtmp.last_result.err)
 
-    assert 'index is done' in runtmp.last_result.err
+    assert "index is done" in runtmp.last_result.err
     captured = capfd.readouterr()
     print(captured.err)
 
 
 def test_index_zipfile_bad(runtmp, capfd):
     # test with a bad input zipfile (a .sig.gz file renamed as zip file)
-    sig2 = get_test_data('2.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
 
-    query_zip = runtmp.output('query.zip')
+    query_zip = runtmp.output("query.zip")
     # cp sig2 into query_zip
-    with open(query_zip, 'wb') as fp:
-        with open(sig2, 'rb') as fp2:
+    with open(query_zip, "wb") as fp:
+        with open(sig2, "rb") as fp2:
             fp.write(fp2.read())
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'index', query_zip,
-                        '-o', output)
+        runtmp.sourmash("scripts", "index", query_zip, "-o", output)
 
     captured = capfd.readouterr()
     print(captured.err)
@@ -419,60 +456,57 @@ def test_index_zipfile_bad(runtmp, capfd):
 
 def test_index_check(runtmp, toggle_internal_storage):
     # test check index
-    siglist = runtmp.output('db-sigs.txt')
+    siglist = runtmp.output("db-sigs.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
 
     make_file_list(siglist, [sig2, sig47])
 
-    output = runtmp.output('db.rocksdb')
+    output = runtmp.output("db.rocksdb")
 
-    runtmp.sourmash('scripts', 'index', siglist,
-                    '-o', output, toggle_internal_storage)
+    runtmp.sourmash("scripts", "index", siglist, "-o", output, toggle_internal_storage)
 
-    runtmp.sourmash('scripts', 'check', output)
+    runtmp.sourmash("scripts", "check", output)
     print(runtmp.last_result.err)
 
-    assert 'index is ok' in runtmp.last_result.err
+    assert "index is ok" in runtmp.last_result.err
 
 
 def test_index_check_quick(runtmp, toggle_internal_storage):
     # test check index
-    siglist = runtmp.output('db-sigs.txt')
+    siglist = runtmp.output("db-sigs.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
 
     make_file_list(siglist, [sig2, sig47])
 
-    output = runtmp.output('db.rocksdb')
+    output = runtmp.output("db.rocksdb")
 
-    runtmp.sourmash('scripts', 'index', siglist,
-                    '-o', output, toggle_internal_storage)
+    runtmp.sourmash("scripts", "index", siglist, "-o", output, toggle_internal_storage)
 
-    runtmp.sourmash('scripts', 'check', '--quick', output)
+    runtmp.sourmash("scripts", "check", "--quick", output)
     print(runtmp.last_result.err)
 
-    assert 'index is ok' in runtmp.last_result.err
+    assert "index is ok" in runtmp.last_result.err
 
 
 def test_index_subdir(runtmp, toggle_internal_storage):
     # test basic index & output to subdir
-    siglist = runtmp.output('db-sigs.txt')
+    siglist = runtmp.output("db-sigs.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(siglist, [sig2, sig47, sig63])
 
-    os.mkdir(runtmp.output('subdir'))
-    output = runtmp.output('subdir/db.rocksdb')
+    os.mkdir(runtmp.output("subdir"))
+    output = runtmp.output("subdir/db.rocksdb")
 
-    runtmp.sourmash('scripts', 'index', siglist,
-                    '-o', output, toggle_internal_storage)
+    runtmp.sourmash("scripts", "index", siglist, "-o", output, toggle_internal_storage)
     assert os.path.exists(output)
     print(runtmp.last_result.err)
 
-    runtmp.sourmash('scripts', 'check', output)
+    runtmp.sourmash("scripts", "check", output)
diff --git a/src/python/tests/test_manysearch.py b/src/python/tests/test_manysearch.py
index 4750d9d6..f56708c7 100644
--- a/src/python/tests/test_manysearch.py
+++ b/src/python/tests/test_manysearch.py
@@ -4,70 +4,75 @@
 import sourmash
 
 from . import sourmash_tst_utils as utils
-from .sourmash_tst_utils import (get_test_data, make_file_list, zip_siglist,
-                                 index_siglist)
+from .sourmash_tst_utils import (
+    get_test_data,
+    make_file_list,
+    zip_siglist,
+    index_siglist,
+)
 
 
 def test_installed(runtmp):
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'manysearch')
+        runtmp.sourmash("scripts", "manysearch")
 
-    assert 'usage:  manysearch' in runtmp.last_result.err
+    assert "usage:  manysearch" in runtmp.last_result.err
 
 
 def test_simple(runtmp, zip_query, zip_against):
     # test basic execution!
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(query_list, [sig2, sig47, sig63])
     make_file_list(against_list, [sig2, sig47, sig63])
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
     if zip_query:
-        query_list = zip_siglist(runtmp, query_list, runtmp.output('query.zip'))
+        query_list = zip_siglist(runtmp, query_list, runtmp.output("query.zip"))
     if zip_against:
-        against_list = zip_siglist(runtmp, against_list, runtmp.output('against.zip'))
+        against_list = zip_siglist(runtmp, against_list, runtmp.output("against.zip"))
 
-    runtmp.sourmash('scripts', 'manysearch', query_list, against_list,
-                    '-o', output, '-t', '0.01')
+    runtmp.sourmash(
+        "scripts", "manysearch", query_list, against_list, "-o", output, "-t", "0.01"
+    )
     assert os.path.exists(output)
 
     df = pandas.read_csv(output)
     assert len(df) == 5
 
-    dd = df.to_dict(orient='index')
+    dd = df.to_dict(orient="index")
     print(dd)
 
     for idx, row in dd.items():
         # identical?
-        if row['match_name'] == row['query_name']:
-            assert row['query_md5'] == row['match_md5'], row
-            assert float(row['containment'] == 1.0)
-            assert float(row['jaccard'] == 1.0)
-            assert float(row['max_containment'] == 1.0)
-            assert float(row['query_containment_ani'] == 1.0)
-            assert float(row['match_containment_ani'] == 1.0)
-            assert float(row['average_containment_ani'] == 1.0)
-            assert float(row['max_containment_ani'] == 1.0)
+        if row["match_name"] == row["query_name"]:
+            assert row["query_md5"] == row["match_md5"], row
+            assert float(row["containment"] == 1.0)
+            assert float(row["jaccard"] == 1.0)
+            assert float(row["max_containment"] == 1.0)
+            assert float(row["query_containment_ani"] == 1.0)
+            assert float(row["match_containment_ani"] == 1.0)
+            assert float(row["average_containment_ani"] == 1.0)
+            assert float(row["max_containment_ani"] == 1.0)
 
         else:
             # confirm hand-checked numbers
-            q = row['query_name'].split()[0]
-            m = row['match_name'].split()[0]
-            cont = float(row['containment'])
-            jaccard = float(row['jaccard'])
-            maxcont = float(row['max_containment'])
-            intersect_hashes = int(row['intersect_hashes'])
-            query_ani = float(row['query_containment_ani'])
-            match_ani = float(row['match_containment_ani'])
-            average_ani = float(row['average_containment_ani'])
-            max_ani = float(row['max_containment_ani'])
+            q = row["query_name"].split()[0]
+            m = row["match_name"].split()[0]
+            cont = float(row["containment"])
+            jaccard = float(row["jaccard"])
+            maxcont = float(row["max_containment"])
+            intersect_hashes = int(row["intersect_hashes"])
+            query_ani = float(row["query_containment_ani"])
+            match_ani = float(row["match_containment_ani"])
+            average_ani = float(row["average_containment_ani"])
+            max_ani = float(row["max_containment_ani"])
             jaccard = round(jaccard, 4)
             cont = round(cont, 4)
             maxcont = round(maxcont, 4)
@@ -75,10 +80,19 @@ def test_simple(runtmp, zip_query, zip_against):
             match_ani = round(match_ani, 4)
             average_ani = round(average_ani, 4)
             max_ani = round(max_ani, 4)
-            print(q, m, f"{jaccard:.04}", f"{cont:.04}", f"{maxcont:.04}",
-                        f"{query_ani:.04}", f"{match_ani:.04}", f"{average_ani:.04}", f"{max_ani:.04}")
-
-            if q == 'NC_011665.1' and m == 'NC_009661.1':
+            print(
+                q,
+                m,
+                f"{jaccard:.04}",
+                f"{cont:.04}",
+                f"{maxcont:.04}",
+                f"{query_ani:.04}",
+                f"{match_ani:.04}",
+                f"{average_ani:.04}",
+                f"{max_ani:.04}",
+            )
+
+            if q == "NC_011665.1" and m == "NC_009661.1":
                 assert jaccard == 0.3207
                 assert cont == 0.4828
                 assert maxcont == 0.4885
@@ -88,7 +102,7 @@ def test_simple(runtmp, zip_query, zip_against):
                 assert average_ani == 0.977
                 assert max_ani == 0.9772
 
-            if q == 'NC_009661.1' and m == 'NC_011665.1':
+            if q == "NC_009661.1" and m == "NC_011665.1":
                 assert jaccard == 0.3207
                 assert cont == 0.4885
                 assert maxcont == 0.4885
@@ -101,38 +115,49 @@ def test_simple(runtmp, zip_query, zip_against):
 
 def test_simple_abund(runtmp):
     # test with abund sig
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
-    query_list = runtmp.output('query.txt')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
+    query_list = runtmp.output("query.txt")
     make_file_list(query_list, [sig2, sig47, sig63])
 
-    against = get_test_data('SRR606249.sig.gz')
-
-    output = runtmp.output('out.csv')
-
-    runtmp.sourmash('scripts', 'manysearch', query_list, against,
-                    '-o', output, '--scaled', '100000', '-k', '31',
-                    '-t', '0.01')
+    against = get_test_data("SRR606249.sig.gz")
+
+    output = runtmp.output("out.csv")
+
+    runtmp.sourmash(
+        "scripts",
+        "manysearch",
+        query_list,
+        against,
+        "-o",
+        output,
+        "--scaled",
+        "100000",
+        "-k",
+        "31",
+        "-t",
+        "0.01",
+    )
 
     assert os.path.exists(output)
 
     df = pandas.read_csv(output)
     assert len(df) == 3
 
-    dd = df.to_dict(orient='index')
-    dd = list(sorted(dd.values(), key=lambda x: x['query_name']))
+    dd = df.to_dict(orient="index")
+    dd = list(sorted(dd.values(), key=lambda x: x["query_name"]))
     print(dd)
 
     row = dd[0]
-    query_name = row['query_name'].split()[0]
-    average_abund = round(float(row['average_abund']), 4)
-    median_abund = round(float(row['median_abund']), 4)
-    std_abund = round(float(row['std_abund']), 4)
-    n_weighted_found = int(row['n_weighted_found'])
-    total_weighted_hashes = int(row['total_weighted_hashes'])
-
-    assert query_name == 'CP001071.1'
+    query_name = row["query_name"].split()[0]
+    average_abund = round(float(row["average_abund"]), 4)
+    median_abund = round(float(row["median_abund"]), 4)
+    std_abund = round(float(row["std_abund"]), 4)
+    n_weighted_found = int(row["n_weighted_found"])
+    total_weighted_hashes = int(row["total_weighted_hashes"])
+
+    assert query_name == "CP001071.1"
     assert average_abund == round(21.045454545454500, 4)
     assert median_abund == 21.5
     assert std_abund == round(5.644605411181010, 4)
@@ -140,14 +165,14 @@ def test_simple_abund(runtmp):
     assert total_weighted_hashes == 73489
 
     row = dd[1]
-    query_name = row['query_name'].split()[0]
-    average_abund = round(float(row['average_abund']), 4)
-    median_abund = round(float(row['median_abund']), 4)
-    std_abund = round(float(row['std_abund']), 4)
-    n_weighted_found = int(row['n_weighted_found'])
-    total_weighted_hashes = int(row['total_weighted_hashes'])
-
-    assert query_name == 'NC_009661.1'
+    query_name = row["query_name"].split()[0]
+    average_abund = round(float(row["average_abund"]), 4)
+    median_abund = round(float(row["median_abund"]), 4)
+    std_abund = round(float(row["std_abund"]), 4)
+    n_weighted_found = int(row["n_weighted_found"])
+    total_weighted_hashes = int(row["total_weighted_hashes"])
+
+    assert query_name == "NC_009661.1"
     assert average_abund == round(11.365853658536600, 4)
     assert median_abund == 11.0
     assert std_abund == round(4.976805212676670, 4)
@@ -155,14 +180,14 @@ def test_simple_abund(runtmp):
     assert total_weighted_hashes == 73489
 
     row = dd[2]
-    query_name = row['query_name'].split()[0]
-    average_abund = round(float(row['average_abund']), 4)
-    median_abund = round(float(row['median_abund']), 4)
-    std_abund = round(float(row['std_abund']), 4)
-    n_weighted_found = int(row['n_weighted_found'])
-    total_weighted_hashes = int(row['total_weighted_hashes'])
-
-    assert query_name == 'NC_011665.1'
+    query_name = row["query_name"].split()[0]
+    average_abund = round(float(row["average_abund"]), 4)
+    median_abund = round(float(row["median_abund"]), 4)
+    std_abund = round(float(row["std_abund"]), 4)
+    n_weighted_found = int(row["n_weighted_found"])
+    total_weighted_hashes = int(row["total_weighted_hashes"])
+
+    assert query_name == "NC_011665.1"
     assert average_abund == round(10.386363636363600, 4)
     assert median_abund == 10.5
     assert std_abund == round(6.932190750047300, 4)
@@ -172,59 +197,60 @@ def test_simple_abund(runtmp):
 
 def test_simple_indexed(runtmp, zip_query, indexed_query):
     # test basic execution!
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(query_list, [sig2, sig47, sig63])
     make_file_list(against_list, [sig2, sig47, sig63])
 
     if zip_query:
-        query_list = zip_siglist(runtmp, query_list, runtmp.output('query.zip'))
+        query_list = zip_siglist(runtmp, query_list, runtmp.output("query.zip"))
 
     if indexed_query:
-        query_list = index_siglist(runtmp, query_list, runtmp.output('query_db'))
+        query_list = index_siglist(runtmp, query_list, runtmp.output("query_db"))
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
-    against_list = index_siglist(runtmp, against_list, runtmp.output('db'))
+    against_list = index_siglist(runtmp, against_list, runtmp.output("db"))
 
-    print('query_list is:', query_list)
-    runtmp.sourmash('scripts', 'manysearch', query_list, against_list,
-                    '-o', output, '-t', '0.01')
+    print("query_list is:", query_list)
+    runtmp.sourmash(
+        "scripts", "manysearch", query_list, against_list, "-o", output, "-t", "0.01"
+    )
     assert os.path.exists(output)
 
     df = pandas.read_csv(output)
     assert len(df) == 5
 
-    dd = df.to_dict(orient='index')
+    dd = df.to_dict(orient="index")
     print(dd)
 
     for idx, row in dd.items():
         # identical?
-        if row['match_name'] == row['query_name']:
-            assert float(row['containment'] == 1.0)
-            assert float(row['query_containment_ani'] == 1.0)
+        if row["match_name"] == row["query_name"]:
+            assert float(row["containment"] == 1.0)
+            assert float(row["query_containment_ani"] == 1.0)
         else:
             # confirm hand-checked numbers
-            q = row['query_name'].split()[0]
-            m = row['match_name'].split()[0]
-            cont = float(row['containment'])
-            intersect_hashes = int(row['intersect_hashes'])
-            query_ani = float(row['query_containment_ani'])
+            q = row["query_name"].split()[0]
+            m = row["match_name"].split()[0]
+            cont = float(row["containment"])
+            intersect_hashes = int(row["intersect_hashes"])
+            query_ani = float(row["query_containment_ani"])
             cont = round(cont, 4)
             query_ani = round(query_ani, 4)
             print(q, m, f"{cont:.04}", f"{query_ani:.04}")
 
-            if q == 'NC_011665.1' and m == 'NC_009661.1':
+            if q == "NC_011665.1" and m == "NC_009661.1":
                 assert cont == 0.4828
                 assert intersect_hashes == 2529
                 assert query_ani == 0.9768
 
-            if q == 'NC_009661.1' and m == 'NC_011665.1':
+            if q == "NC_009661.1" and m == "NC_011665.1":
                 assert cont == 0.4885
                 assert intersect_hashes == 2529
                 assert query_ani == 0.9772
@@ -232,50 +258,51 @@ def test_simple_indexed(runtmp, zip_query, indexed_query):
 
 def test_simple_list_of_zips(runtmp):
     # test basic execution!
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.sig.zip')
-    sig47 = get_test_data('47.sig.zip')
-    sig63 = get_test_data('63.sig.zip')
+    sig2 = get_test_data("2.sig.zip")
+    sig47 = get_test_data("47.sig.zip")
+    sig63 = get_test_data("63.sig.zip")
 
     make_file_list(query_list, [sig2, sig47, sig63])
     make_file_list(against_list, [sig2, sig47, sig63])
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
-    runtmp.sourmash('scripts', 'manysearch', query_list, against_list,
-                    '-o', output, '-t', '0.01')
+    runtmp.sourmash(
+        "scripts", "manysearch", query_list, against_list, "-o", output, "-t", "0.01"
+    )
     assert os.path.exists(output)
 
     df = pandas.read_csv(output)
     assert len(df) == 5
 
-    dd = df.to_dict(orient='index')
+    dd = df.to_dict(orient="index")
     print(dd)
 
     for idx, row in dd.items():
         # identical?
-        if row['match_name'] == row['query_name']:
-            assert float(row['containment'] == 1.0)
-            assert float(row['query_containment_ani'] == 1.0)
+        if row["match_name"] == row["query_name"]:
+            assert float(row["containment"] == 1.0)
+            assert float(row["query_containment_ani"] == 1.0)
         else:
             # confirm hand-checked numbers
-            q = row['query_name'].split()[0]
-            m = row['match_name'].split()[0]
-            cont = float(row['containment'])
-            intersect_hashes = int(row['intersect_hashes'])
-            query_ani = float(row['query_containment_ani'])
+            q = row["query_name"].split()[0]
+            m = row["match_name"].split()[0]
+            cont = float(row["containment"])
+            intersect_hashes = int(row["intersect_hashes"])
+            query_ani = float(row["query_containment_ani"])
             cont = round(cont, 4)
             query_ani = round(query_ani, 4)
             print(q, m, f"{cont:.04}", f"{query_ani:.04}")
 
-            if q == 'NC_011665.1' and m == 'NC_009661.1':
+            if q == "NC_011665.1" and m == "NC_009661.1":
                 assert cont == 0.4828
                 assert intersect_hashes == 2529
                 assert query_ani == 0.9768
 
-            if q == 'NC_009661.1' and m == 'NC_011665.1':
+            if q == "NC_009661.1" and m == "NC_011665.1":
                 assert cont == 0.4885
                 assert intersect_hashes == 2529
                 assert query_ani == 0.9772
@@ -283,26 +310,36 @@ def test_simple_list_of_zips(runtmp):
 
 def test_simple_with_cores(runtmp, capfd, indexed, zip_query):
     # test basic execution with -c argument (that it runs, at least!)
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(query_list, [sig2, sig47, sig63])
     make_file_list(against_list, [sig2, sig47, sig63])
 
     if indexed:
-        against_list = index_siglist(runtmp, against_list, runtmp.output('db'))
+        against_list = index_siglist(runtmp, against_list, runtmp.output("db"))
 
     if zip_query:
-        query_list = zip_siglist(runtmp, query_list, runtmp.output('query.zip'))
-
-    output = runtmp.output('out.csv')
-
-    runtmp.sourmash('scripts', 'manysearch', query_list, against_list,
-                    '-o', output, '-c', '4', '-t', '0.01')
+        query_list = zip_siglist(runtmp, query_list, runtmp.output("query.zip"))
+
+    output = runtmp.output("out.csv")
+
+    runtmp.sourmash(
+        "scripts",
+        "manysearch",
+        query_list,
+        against_list,
+        "-o",
+        output,
+        "-c",
+        "4",
+        "-t",
+        "0.01",
+    )
     assert os.path.exists(output)
 
     df = pandas.read_csv(output)
@@ -315,26 +352,27 @@ def test_simple_with_cores(runtmp, capfd, indexed, zip_query):
 
 def test_simple_threshold(runtmp, indexed, zip_query):
     # test with a simple threshold => only 3 results
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(query_list, [sig2, sig47, sig63])
     make_file_list(against_list, [sig2, sig47, sig63])
 
     if indexed:
-        against_list = index_siglist(runtmp, against_list, runtmp.output('db'))
+        against_list = index_siglist(runtmp, against_list, runtmp.output("db"))
 
     if zip_query:
-        query_list = zip_siglist(runtmp, query_list, runtmp.output('query.zip'))
+        query_list = zip_siglist(runtmp, query_list, runtmp.output("query.zip"))
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
-    runtmp.sourmash('scripts', 'manysearch', query_list, against_list,
-                    '-o', output, '-t', '0.5')
+    runtmp.sourmash(
+        "scripts", "manysearch", query_list, against_list, "-o", output, "-t", "0.5"
+    )
     assert os.path.exists(output)
 
     df = pandas.read_csv(output)
@@ -343,12 +381,12 @@ def test_simple_threshold(runtmp, indexed, zip_query):
 
 def test_simple_manifest(runtmp, indexed):
     # test with a simple threshold => only 3 results
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(query_list, [sig2, sig47, sig63])
     make_file_list(against_list, [sig2, sig47, sig63])
@@ -360,14 +398,15 @@ def test_simple_manifest(runtmp, indexed):
     runtmp.sourmash("sig", "manifest", against_list, "-o", against_mf)
 
     if indexed:
-        against_list = index_siglist(runtmp, against_list, runtmp.output('db'))
+        against_list = index_siglist(runtmp, against_list, runtmp.output("db"))
     else:
         against_list = against_mf
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
-    runtmp.sourmash('scripts', 'manysearch', query_mf, against_list,
-                    '-o', output, '-t', '0.5')
+    runtmp.sourmash(
+        "scripts", "manysearch", query_mf, against_list, "-o", output, "-t", "0.5"
+    )
     assert os.path.exists(output)
 
     df = pandas.read_csv(output)
@@ -376,148 +415,145 @@ def test_simple_manifest(runtmp, indexed):
 
 def test_missing_query(runtmp, capfd, indexed, zip_query):
     # test with a missing query list
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
-    #make_file_list(query_list, [sig2, sig47, sig63]) # don't make query
+    # make_file_list(query_list, [sig2, sig47, sig63]) # don't make query
     make_file_list(against_list, [sig2, sig47, sig63])
 
     if indexed:
-        against_list = index_siglist(runtmp, against_list, runtmp.output('db'))
+        against_list = index_siglist(runtmp, against_list, runtmp.output("db"))
 
     if zip_query:
-        query_list = runtmp.output('query.zip')
+        query_list = runtmp.output("query.zip")
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'manysearch', query_list, against_list,
-                        '-o', output)
+        runtmp.sourmash("scripts", "manysearch", query_list, against_list, "-o", output)
 
     captured = capfd.readouterr()
     print(captured.err)
 
-    assert 'Error: No such file or directory' in captured.err
+    assert "Error: No such file or directory" in captured.err
 
 
 def test_sig_query(runtmp, capfd, indexed):
     # test with a single sig query (a .sig.gz file)
-    against_list = runtmp.output('against.txt')
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(against_list, [sig2, sig47, sig63])
 
     if indexed:
-        against_list = index_siglist(runtmp, against_list, runtmp.output('db'))
+        against_list = index_siglist(runtmp, against_list, runtmp.output("db"))
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
-    runtmp.sourmash('scripts', 'manysearch', sig2, against_list,
-                        '-o', output)
+    runtmp.sourmash("scripts", "manysearch", sig2, against_list, "-o", output)
 
 
 def test_bad_query_2(runtmp, capfd, indexed):
     # test with a bad query list (a missing file)
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
     make_file_list(query_list, [sig2, "no-exist"])
     make_file_list(against_list, [sig2, sig47, sig63])
 
     if indexed:
-        against_list = index_siglist(runtmp, against_list, runtmp.output('db'))
-    output = runtmp.output('out.csv')
+        against_list = index_siglist(runtmp, against_list, runtmp.output("db"))
+    output = runtmp.output("out.csv")
 
-    runtmp.sourmash('scripts', 'manysearch', query_list, against_list,
-                    '-o', output)
+    runtmp.sourmash("scripts", "manysearch", query_list, against_list, "-o", output)
 
     captured = capfd.readouterr()
     print(captured.err)
 
     assert "WARNING: could not load sketches from path 'no-exist'" in captured.err
-    assert "WARNING: 1 query paths failed to load. See error messages above." in captured.err
+    assert (
+        "WARNING: 1 query paths failed to load. See error messages above."
+        in captured.err
+    )
 
 
 def test_bad_query_3(runtmp, capfd):
     # test with a bad query (a .sig.gz file renamed as zip file)
-    against_list = runtmp.output('against.txt')
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
-    query_zip = runtmp.output('query.zip')
+    query_zip = runtmp.output("query.zip")
     # cp sig2 into query_zip
-    with open(query_zip, 'wb') as fp:
-        with open(sig2, 'rb') as fp2:
+    with open(query_zip, "wb") as fp:
+        with open(sig2, "rb") as fp2:
             fp.write(fp2.read())
 
     make_file_list(against_list, [sig2, sig47, sig63])
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'multisearch', query_zip, against_list,
-                        '-o', output)
+        runtmp.sourmash("scripts", "multisearch", query_zip, against_list, "-o", output)
 
     captured = capfd.readouterr()
     print(captured.err)
 
-    assert 'InvalidArchive' in captured.err
+    assert "InvalidArchive" in captured.err
 
 
 def test_missing_against(runtmp, capfd, indexed):
     # test with a missing against list
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(query_list, [sig2, sig47, sig63])
     # do not create against_list
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'manysearch', query_list, against_list,
-                        '-o', output)
+        runtmp.sourmash("scripts", "manysearch", query_list, against_list, "-o", output)
 
     captured = capfd.readouterr()
     print(captured.err)
 
-    assert 'Error: No such file or directory' in captured.err
+    assert "Error: No such file or directory" in captured.err
 
 
 def test_nomatch_against(runtmp, capfd):
     # nonmatching against file (num sig)
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
-    nomatch_sketch = get_test_data('SRR606249.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
+    nomatch_sketch = get_test_data("SRR606249.sig.gz")
 
     make_file_list(query_list, [sig2, sig47, sig63])
     make_file_list(against_list, [nomatch_sketch])
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'manysearch', query_list, against_list,
-                        '-o', output)
+        runtmp.sourmash("scripts", "manysearch", query_list, against_list, "-o", output)
 
     captured = capfd.readouterr()
     assert "No search signatures loaded, exiting." in captured.err
@@ -525,47 +561,48 @@ def test_nomatch_against(runtmp, capfd):
 
 def test_bad_against(runtmp, capfd):
     # test with a bad against list (a missing file)
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
     make_file_list(query_list, [sig2, sig47, sig63])
     make_file_list(against_list, [sig2, "no-exist"])
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
-    runtmp.sourmash('scripts', 'manysearch', query_list, against_list,
-                    '-o', output)
+    runtmp.sourmash("scripts", "manysearch", query_list, against_list, "-o", output)
 
     captured = capfd.readouterr()
     print(captured.err)
 
     assert "WARNING: could not load sketches from path 'no-exist'" in captured.err
-    assert "WARNING: 1 search paths failed to load. See error messages above." in captured.err
+    assert (
+        "WARNING: 1 search paths failed to load. See error messages above."
+        in captured.err
+    )
 
 
 def test_empty_query(runtmp, indexed, capfd):
     # test with an empty query list
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(query_list, [])
     make_file_list(against_list, [sig2, sig47, sig63])
 
     if indexed:
-        against_list = index_siglist(runtmp, against_list, runtmp.output('db'))
+        against_list = index_siglist(runtmp, against_list, runtmp.output("db"))
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'manysearch', query_list, against_list,
-                        '-o', output)
+        runtmp.sourmash("scripts", "manysearch", query_list, against_list, "-o", output)
 
     print(runtmp.last_result.err)
     captured = capfd.readouterr()
@@ -575,91 +612,88 @@ def test_empty_query(runtmp, indexed, capfd):
 
 def test_nomatch_query(runtmp, capfd, indexed, zip_query):
     # test a non-matching (diff ksize) in query; do we get warning message?
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig1 = get_test_data('1.fa.k21.sig.gz')
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig1 = get_test_data("1.fa.k21.sig.gz")
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(query_list, [sig2, sig47, sig63, sig1])
     make_file_list(against_list, [sig2, sig47, sig63])
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
     if zip_query:
-        query_list = zip_siglist(runtmp, query_list, runtmp.output('query.zip'))
+        query_list = zip_siglist(runtmp, query_list, runtmp.output("query.zip"))
 
     if indexed:
-        against_list = index_siglist(runtmp, against_list, runtmp.output('db'))
+        against_list = index_siglist(runtmp, against_list, runtmp.output("db"))
 
-    runtmp.sourmash('scripts', 'manysearch', query_list, against_list,
-                    '-o', output)
+    runtmp.sourmash("scripts", "manysearch", query_list, against_list, "-o", output)
     assert os.path.exists(output)
 
     captured = capfd.readouterr()
     print(captured.err)
 
-    assert 'WARNING: skipped 1 query paths - no compatible signatures.' in captured.err
+    assert "WARNING: skipped 1 query paths - no compatible signatures." in captured.err
 
 
 def test_load_only_one_bug(runtmp, capfd, indexed, zip_against):
     # check that we behave properly when presented with multiple against
     # sketches
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig1_k31 = get_test_data('1.fa.k31.sig.gz')
+    sig1_k31 = get_test_data("1.fa.k31.sig.gz")
 
     # note: this was created as a 3-sketch-in-one-signature directly
     # via sourmash sketch dna -p k=21,k=31,k=51.
-    sig1_all = get_test_data('1.combined.sig.gz')
+    sig1_all = get_test_data("1.combined.sig.gz")
 
     make_file_list(query_list, [sig1_k31])
     make_file_list(against_list, [sig1_all])
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
     if zip_against:
-        against_list = zip_siglist(runtmp, against_list, runtmp.output('against.zip'))
+        against_list = zip_siglist(runtmp, against_list, runtmp.output("against.zip"))
     elif indexed:
-        against_list = index_siglist(runtmp, against_list, runtmp.output('db'))
+        against_list = index_siglist(runtmp, against_list, runtmp.output("db"))
 
-    runtmp.sourmash('scripts', 'manysearch', query_list, against_list,
-                    '-o', output)
+    runtmp.sourmash("scripts", "manysearch", query_list, against_list, "-o", output)
     assert os.path.exists(output)
 
     captured = capfd.readouterr()
     print(captured.err)
 
-    assert not 'WARNING: skipped 1 paths - no compatible signatures.' in captured.err
-    assert not 'WARNING: no compatible sketches in path ' in captured.err
+    assert not "WARNING: skipped 1 paths - no compatible signatures." in captured.err
+    assert not "WARNING: no compatible sketches in path " in captured.err
 
 
 def test_load_only_one_bug_as_query(runtmp, capfd, indexed, zip_query):
     # check that we behave properly when presented with multiple query
     # sketches in one file, with only one matching.
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig1_k31 = get_test_data('1.fa.k31.sig.gz')
+    sig1_k31 = get_test_data("1.fa.k31.sig.gz")
 
     # note: this was created as a 3-sketch-in-one-signature directly
     # via sourmash sketch dna -p k=21,k=31,k=51.
-    sig1_all = get_test_data('1.combined.sig.gz')
+    sig1_all = get_test_data("1.combined.sig.gz")
 
     make_file_list(query_list, [sig1_all])
     make_file_list(against_list, [sig1_k31])
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
     if indexed:
-        against_list = index_siglist(runtmp, against_list, runtmp.output('db'))
+        against_list = index_siglist(runtmp, against_list, runtmp.output("db"))
     if zip_query:
-        query_list = zip_siglist(runtmp, query_list, runtmp.output('query.zip'))
+        query_list = zip_siglist(runtmp, query_list, runtmp.output("query.zip"))
 
-    runtmp.sourmash('scripts', 'manysearch', query_list, against_list,
-                    '-o', output)
+    runtmp.sourmash("scripts", "manysearch", query_list, against_list, "-o", output)
 
     assert os.path.exists(output)
 
@@ -667,46 +701,47 @@ def test_load_only_one_bug_as_query(runtmp, capfd, indexed, zip_query):
     print(captured.err)
     print(runtmp.last_result.out)
 
-    assert not 'WARNING: skipped 1 paths - no compatible signatures.' in captured.err
-    assert not 'WARNING: no compatible sketches in path ' in captured.err
+    assert not "WARNING: skipped 1 paths - no compatible signatures." in captured.err
+    assert not "WARNING: no compatible sketches in path " in captured.err
 
 
 def test_md5(runtmp, indexed, zip_query):
     # test that md5s match what was in the original files, not downsampled etc.
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(query_list, [sig2, sig47, sig63])
     make_file_list(against_list, [sig2, sig47, sig63])
 
-    output = runtmp.output('out.csv')
-    
+    output = runtmp.output("out.csv")
+
     if indexed:
-        against_list = index_siglist(runtmp, against_list, runtmp.output('db'))
+        against_list = index_siglist(runtmp, against_list, runtmp.output("db"))
 
     if zip_query:
-        query_list = zip_siglist(runtmp, query_list, runtmp.output('query.zip'))
+        query_list = zip_siglist(runtmp, query_list, runtmp.output("query.zip"))
 
-    runtmp.sourmash('scripts', 'manysearch', query_list, against_list,
-                    '-o', output, '-t', '0.01')
+    runtmp.sourmash(
+        "scripts", "manysearch", query_list, against_list, "-o", output, "-t", "0.01"
+    )
     assert os.path.exists(output)
 
     df = pandas.read_csv(output)
     assert len(df) == 5
 
-    md5s = list(df['query_md5'])
+    md5s = list(df["query_md5"])
     print(md5s)
 
     for query_file in (sig2, sig47, sig63):
         for ss in sourmash.load_file_as_signatures(query_file, ksize=31):
             assert ss.md5sum() in md5s
 
-    if not indexed: # indexed search cannot produce match_md5
-        md5s = list(df['match_md5'])
+    if not indexed:  # indexed search cannot produce match_md5
+        md5s = list(df["match_md5"])
         print(md5s)
 
         for against_file in (sig2, sig47, sig63):
@@ -716,45 +751,58 @@ def test_md5(runtmp, indexed, zip_query):
 
 def test_simple_protein(runtmp):
     # test basic execution with proteins
-    protsigs = get_test_data('protein.zip')
-    output = runtmp.output('out.csv')
-
-    runtmp.sourmash('scripts', 'manysearch', protsigs, protsigs,
-                        '-k', '19', '-s', '100', '--moltype', 'protein',
-                        '-o', output, '-t', '0.01')
+    protsigs = get_test_data("protein.zip")
+    output = runtmp.output("out.csv")
+
+    runtmp.sourmash(
+        "scripts",
+        "manysearch",
+        protsigs,
+        protsigs,
+        "-k",
+        "19",
+        "-s",
+        "100",
+        "--moltype",
+        "protein",
+        "-o",
+        output,
+        "-t",
+        "0.01",
+    )
 
     assert os.path.exists(output)
 
     df = pandas.read_csv(output)
     assert len(df) == 4
 
-    dd = df.to_dict(orient='index')
+    dd = df.to_dict(orient="index")
     print(dd)
 
     for idx, row in dd.items():
         print(row)
         # identical?
-        if row['match_name'] == row['query_name']:
-            assert row['query_md5'] == row['match_md5'], row
-            assert float(row['containment'] == 1.0)
-            assert float(row['jaccard'] == 1.0)
-            assert float(row['max_containment'] == 1.0)
-            assert float(row['query_containment_ani']) == 1.0
-            assert float(row['match_containment_ani']) == 1.0
-            assert float(row['average_containment_ani']) == 1.0
-            assert float(row['max_containment_ani']) == 1.0
+        if row["match_name"] == row["query_name"]:
+            assert row["query_md5"] == row["match_md5"], row
+            assert float(row["containment"] == 1.0)
+            assert float(row["jaccard"] == 1.0)
+            assert float(row["max_containment"] == 1.0)
+            assert float(row["query_containment_ani"]) == 1.0
+            assert float(row["match_containment_ani"]) == 1.0
+            assert float(row["average_containment_ani"]) == 1.0
+            assert float(row["max_containment_ani"]) == 1.0
         else:
-        # confirm hand-checked numbers
-            q = row['query_name'].split()[0]
-            m = row['match_name'].split()[0]
-            cont = float(row['containment'])
-            jaccard = float(row['jaccard'])
-            maxcont = float(row['max_containment'])
-            intersect_hashes = int(row['intersect_hashes'])
-            query_ani = float(row['query_containment_ani'])
-            match_ani = float(row['match_containment_ani'])
-            average_ani = float(row['average_containment_ani'])
-            max_ani = float(row['max_containment_ani'])
+            # confirm hand-checked numbers
+            q = row["query_name"].split()[0]
+            m = row["match_name"].split()[0]
+            cont = float(row["containment"])
+            jaccard = float(row["jaccard"])
+            maxcont = float(row["max_containment"])
+            intersect_hashes = int(row["intersect_hashes"])
+            query_ani = float(row["query_containment_ani"])
+            match_ani = float(row["match_containment_ani"])
+            average_ani = float(row["average_containment_ani"])
+            max_ani = float(row["max_containment_ani"])
 
             jaccard = round(jaccard, 4)
             cont = round(cont, 4)
@@ -763,73 +811,98 @@ def test_simple_protein(runtmp):
             match_ani = round(match_ani, 4)
             average_ani = round(average_ani, 4)
             max_ani = round(max_ani, 4)
-            print(q, m, f"{jaccard:.04}", f"{cont:.04}", f"{maxcont:.04}", intersect_hashes, f"{query_ani:.04}", f"{match_ani:.04}", f"{average_ani:.04}", f"{max_ani:.04}")
-
-            if q == 'GCA_001593925' and m == 'GCA_001593935':
+            print(
+                q,
+                m,
+                f"{jaccard:.04}",
+                f"{cont:.04}",
+                f"{maxcont:.04}",
+                intersect_hashes,
+                f"{query_ani:.04}",
+                f"{match_ani:.04}",
+                f"{average_ani:.04}",
+                f"{max_ani:.04}",
+            )
+
+            if q == "GCA_001593925" and m == "GCA_001593935":
                 assert jaccard == 0.0434
                 assert cont == 0.1003
                 assert maxcont == 0.1003
                 assert intersect_hashes == 342
-                assert query_ani == 0.9605 
+                assert query_ani == 0.9605
                 assert match_ani == 0.9547
                 assert average_ani == 0.9576
                 assert max_ani == 0.9605
 
-            if q == 'GCA_001593935' and m == 'GCA_001593925':
+            if q == "GCA_001593935" and m == "GCA_001593925":
                 assert jaccard == 0.0434
                 assert cont == 0.0712
                 assert maxcont == 0.1003
                 assert intersect_hashes == 342
-                assert query_ani == 0.9547 
-                assert match_ani == 0.9605 
+                assert query_ani == 0.9547
+                assert match_ani == 0.9605
                 assert average_ani == 0.9576
                 assert max_ani == 0.9605
 
 
 def test_simple_protein_indexed(runtmp):
     # test basic execution with proteins
-    protsigs = get_test_data('protein.zip')
-    output = runtmp.output('out.csv')
-
-    protsigs_db = index_siglist(runtmp, protsigs, runtmp.output('db'),
-                             ksize=19, moltype='protein', scaled=100)
-
-    runtmp.sourmash('scripts', 'manysearch', protsigs, protsigs_db,
-                        '-k', '19', '-s', '100', '--moltype', 'protein',
-                        '-o', output, '-t', '0.01')
+    protsigs = get_test_data("protein.zip")
+    output = runtmp.output("out.csv")
+
+    protsigs_db = index_siglist(
+        runtmp, protsigs, runtmp.output("db"), ksize=19, moltype="protein", scaled=100
+    )
+
+    runtmp.sourmash(
+        "scripts",
+        "manysearch",
+        protsigs,
+        protsigs_db,
+        "-k",
+        "19",
+        "-s",
+        "100",
+        "--moltype",
+        "protein",
+        "-o",
+        output,
+        "-t",
+        "0.01",
+    )
 
     assert os.path.exists(output)
 
     df = pandas.read_csv(output)
     assert len(df) == 4
 
-    dd = df.to_dict(orient='index')
+    dd = df.to_dict(orient="index")
     print(dd)
 
     for idx, row in dd.items():
         print(row)
         # identical?
-        if row['match_name'] == row['query_name']:
-            assert float(row['containment'] == 1.0)
-            assert float(row['query_containment_ani'] == 1.0)
+        if row["match_name"] == row["query_name"]:
+            assert float(row["containment"] == 1.0)
+            assert float(row["query_containment_ani"] == 1.0)
         else:
-        # confirm hand-checked numbers
-            q = row['query_name'].split()[0]
-            m = row['match_name'].split()[0]
-            cont = float(row['containment'])
-            query_ani = float(row['query_containment_ani'])
-            intersect_hashes = int(row['intersect_hashes'])
+            # confirm hand-checked numbers
+            q = row["query_name"].split()[0]
+            m = row["match_name"].split()[0]
+            cont = float(row["containment"])
+            query_ani = float(row["query_containment_ani"])
+            intersect_hashes = int(row["intersect_hashes"])
 
             cont = round(cont, 4)
             query_ani = round(query_ani, 4)
             print(q, m, f"{cont:.04}", intersect_hashes, f"{query_ani:.04}")
 
-            if q == 'GCA_001593925' and m == 'GCA_001593935':
+            if q == "GCA_001593925" and m == "GCA_001593935":
                 assert cont == 0.1003
                 assert intersect_hashes == 342
                 assert query_ani == 0.9605
 
-            if q == 'GCA_001593935' and m == 'GCA_001593925':
+            if q == "GCA_001593935" and m == "GCA_001593925":
                 assert cont == 0.0712
                 assert intersect_hashes == 342
                 assert query_ani == 0.9547
@@ -837,46 +910,59 @@ def test_simple_protein_indexed(runtmp):
 
 def test_simple_dayhoff(runtmp):
     # test basic execution with dayhoff
-    protsigs = get_test_data('dayhoff.zip')
-    output = runtmp.output('out.csv')
-
-    runtmp.sourmash('scripts', 'manysearch', protsigs, protsigs,
-                        '-k', '19', '-s', '100', '--moltype', 'dayhoff',
-                        '-o', output, '-t', '0.01')
+    protsigs = get_test_data("dayhoff.zip")
+    output = runtmp.output("out.csv")
+
+    runtmp.sourmash(
+        "scripts",
+        "manysearch",
+        protsigs,
+        protsigs,
+        "-k",
+        "19",
+        "-s",
+        "100",
+        "--moltype",
+        "dayhoff",
+        "-o",
+        output,
+        "-t",
+        "0.01",
+    )
 
     assert os.path.exists(output)
 
     df = pandas.read_csv(output)
     assert len(df) == 4
 
-    dd = df.to_dict(orient='index')
+    dd = df.to_dict(orient="index")
     print(dd)
 
     for idx, row in dd.items():
         print(row)
         # identical?
-        if row['match_name'] == row['query_name']:
-            assert row['query_md5'] == row['match_md5'], row
-            assert float(row['containment'] == 1.0)
-            assert float(row['jaccard'] == 1.0)
-            assert float(row['max_containment'] == 1.0)
-            assert float(row['query_containment_ani']) == 1.0
-            assert float(row['match_containment_ani']) == 1.0
-            assert float(row['average_containment_ani']) == 1.0
-            assert float(row['max_containment_ani']) == 1.0
-            
+        if row["match_name"] == row["query_name"]:
+            assert row["query_md5"] == row["match_md5"], row
+            assert float(row["containment"] == 1.0)
+            assert float(row["jaccard"] == 1.0)
+            assert float(row["max_containment"] == 1.0)
+            assert float(row["query_containment_ani"]) == 1.0
+            assert float(row["match_containment_ani"]) == 1.0
+            assert float(row["average_containment_ani"]) == 1.0
+            assert float(row["max_containment_ani"]) == 1.0
+
         else:
-        # confirm hand-checked numbers
-            q = row['query_name'].split()[0]
-            m = row['match_name'].split()[0]
-            cont = float(row['containment'])
-            jaccard = float(row['jaccard'])
-            maxcont = float(row['max_containment'])
-            intersect_hashes = int(row['intersect_hashes'])
-            query_ani = float(row['query_containment_ani'])
-            match_ani = float(row['match_containment_ani'])
-            average_ani = float(row['average_containment_ani'])
-            max_ani = float(row['max_containment_ani'])
+            # confirm hand-checked numbers
+            q = row["query_name"].split()[0]
+            m = row["match_name"].split()[0]
+            cont = float(row["containment"])
+            jaccard = float(row["jaccard"])
+            maxcont = float(row["max_containment"])
+            intersect_hashes = int(row["intersect_hashes"])
+            query_ani = float(row["query_containment_ani"])
+            match_ani = float(row["match_containment_ani"])
+            average_ani = float(row["average_containment_ani"])
+            max_ani = float(row["max_containment_ani"])
 
             jaccard = round(jaccard, 4)
             cont = round(cont, 4)
@@ -885,9 +971,20 @@ def test_simple_dayhoff(runtmp):
             match_ani = round(match_ani, 4)
             average_ani = round(average_ani, 4)
             max_ani = round(max_ani, 4)
-            print(q, m, f"{jaccard:.04}", f"{cont:.04}", f"{maxcont:.04}", intersect_hashes, f"{query_ani:.04}", f"{match_ani:.04}", f"{average_ani:.04}", f"{max_ani:.04}")
-
-            if q == 'GCA_001593925' and m == 'GCA_001593935':
+            print(
+                q,
+                m,
+                f"{jaccard:.04}",
+                f"{cont:.04}",
+                f"{maxcont:.04}",
+                intersect_hashes,
+                f"{query_ani:.04}",
+                f"{match_ani:.04}",
+                f"{average_ani:.04}",
+                f"{max_ani:.04}",
+            )
+
+            if q == "GCA_001593925" and m == "GCA_001593935":
                 assert jaccard == 0.1326
                 assert cont == 0.2815
                 assert maxcont == 0.2815
@@ -897,7 +994,7 @@ def test_simple_dayhoff(runtmp):
                 assert average_ani == 0.9751
                 assert max_ani == 0.978
 
-            if q == 'GCA_001593935' and m == 'GCA_001593925':
+            if q == "GCA_001593935" and m == "GCA_001593925":
                 assert jaccard == 0.1326
                 assert cont == 0.2004
                 assert maxcont == 0.2815
@@ -910,48 +1007,62 @@ def test_simple_dayhoff(runtmp):
 
 def test_simple_dayhoff_indexed(runtmp):
     # test indexed execution with dayhoff
-    protsigs = get_test_data('dayhoff.zip')
-    output = runtmp.output('out.csv')
-
-    protsigs_db = index_siglist(runtmp, protsigs, runtmp.output('db'),
-                             ksize=19, moltype='dayhoff', scaled=100)
-
-    runtmp.sourmash('scripts', 'manysearch', protsigs, protsigs_db,
-                        '-k', '19', '-s', '100', '--moltype', 'dayhoff',
-                        '-o', output, '-t', '0.01')
+    protsigs = get_test_data("dayhoff.zip")
+    output = runtmp.output("out.csv")
+
+    protsigs_db = index_siglist(
+        runtmp, protsigs, runtmp.output("db"), ksize=19, moltype="dayhoff", scaled=100
+    )
+
+    runtmp.sourmash(
+        "scripts",
+        "manysearch",
+        protsigs,
+        protsigs_db,
+        "-k",
+        "19",
+        "-s",
+        "100",
+        "--moltype",
+        "dayhoff",
+        "-o",
+        output,
+        "-t",
+        "0.01",
+    )
 
     assert os.path.exists(output)
 
     df = pandas.read_csv(output)
     assert len(df) == 4
 
-    dd = df.to_dict(orient='index')
+    dd = df.to_dict(orient="index")
     print(dd)
 
     for idx, row in dd.items():
         print(row)
         # identical?
-        if row['match_name'] == row['query_name']:
-            assert float(row['containment'] == 1.0)
-            assert float(row['query_containment_ani'] == 1.0)
+        if row["match_name"] == row["query_name"]:
+            assert float(row["containment"] == 1.0)
+            assert float(row["query_containment_ani"] == 1.0)
         else:
-        # confirm hand-checked numbers
-            q = row['query_name'].split()[0]
-            m = row['match_name'].split()[0]
-            cont = float(row['containment'])
-            query_ani = float(row['query_containment_ani'])
-            intersect_hashes = int(row['intersect_hashes'])
+            # confirm hand-checked numbers
+            q = row["query_name"].split()[0]
+            m = row["match_name"].split()[0]
+            cont = float(row["containment"])
+            query_ani = float(row["query_containment_ani"])
+            intersect_hashes = int(row["intersect_hashes"])
 
             cont = round(cont, 4)
             query_ani = round(query_ani, 4)
             print(q, m, f"{cont:.04}", intersect_hashes, f"{query_ani:.04}")
 
-            if q == 'GCA_001593925' and m == 'GCA_001593935':
+            if q == "GCA_001593925" and m == "GCA_001593935":
                 assert cont == 0.2815
                 assert intersect_hashes == 930
                 assert query_ani == 0.978
 
-            if q == 'GCA_001593935' and m == 'GCA_001593925':
+            if q == "GCA_001593935" and m == "GCA_001593925":
                 assert cont == 0.2004
                 assert intersect_hashes == 930
                 assert query_ani == 0.9722
@@ -959,45 +1070,58 @@ def test_simple_dayhoff_indexed(runtmp):
 
 def test_simple_hp(runtmp):
     # test basic execution with hp
-    protsigs = get_test_data('hp.zip')
-    output = runtmp.output('out.csv')
-
-    runtmp.sourmash('scripts', 'manysearch', protsigs, protsigs,
-                        '-k', '19', '-s', '100', '--moltype', 'hp',
-                        '-o', output, '-t', '0.01')
+    protsigs = get_test_data("hp.zip")
+    output = runtmp.output("out.csv")
+
+    runtmp.sourmash(
+        "scripts",
+        "manysearch",
+        protsigs,
+        protsigs,
+        "-k",
+        "19",
+        "-s",
+        "100",
+        "--moltype",
+        "hp",
+        "-o",
+        output,
+        "-t",
+        "0.01",
+    )
 
     assert os.path.exists(output)
 
     df = pandas.read_csv(output)
     assert len(df) == 4
 
-    dd = df.to_dict(orient='index')
+    dd = df.to_dict(orient="index")
     print(dd)
 
     for idx, row in dd.items():
         print(row)
         # identical?
-        if row['match_name'] == row['query_name']:
-            assert row['query_md5'] == row['match_md5'], row
-            assert float(row['containment'] == 1.0)
-            assert float(row['jaccard'] == 1.0)
-            assert float(row['max_containment'] == 1.0)
-            assert float(row['query_containment_ani']) == 1.0
-            assert float(row['match_containment_ani']) == 1.0
-            assert float(row['average_containment_ani']) == 1.0
-            assert float(row['max_containment_ani']) == 1.0
+        if row["match_name"] == row["query_name"]:
+            assert row["query_md5"] == row["match_md5"], row
+            assert float(row["containment"] == 1.0)
+            assert float(row["jaccard"] == 1.0)
+            assert float(row["max_containment"] == 1.0)
+            assert float(row["query_containment_ani"]) == 1.0
+            assert float(row["match_containment_ani"]) == 1.0
+            assert float(row["average_containment_ani"]) == 1.0
+            assert float(row["max_containment_ani"]) == 1.0
         else:
-        # confirm hand-checked numbers
-            q = row['query_name'].split()[0]
-            m = row['match_name'].split()[0]
-            cont = float(row['containment'])
-            jaccard = float(row['jaccard'])
-            maxcont = float(row['max_containment'])
-            intersect_hashes = int(row['intersect_hashes'])
-            query_ani = float(row['query_containment_ani'])
-            match_ani = float(row['match_containment_ani'])
-            average_ani = float(row['average_containment_ani'])
-            max_ani = float(row['max_containment_ani'])
+            # confirm hand-checked numbers
+            q = row["query_name"].split()[0]
+            m = row["match_name"].split()[0]
+            cont = float(row["containment"])
+            jaccard = float(row["jaccard"])
+            maxcont = float(row["max_containment"])
+            intersect_hashes = int(row["intersect_hashes"])
+            query_ani = float(row["query_containment_ani"])
+            match_ani = float(row["match_containment_ani"])
+            average_ani = float(row["average_containment_ani"])
+            max_ani = float(row["max_containment_ani"])
 
             jaccard = round(jaccard, 4)
             cont = round(cont, 4)
@@ -1006,9 +1130,20 @@ def test_simple_hp(runtmp):
             match_ani = round(match_ani, 4)
             average_ani = round(average_ani, 4)
             max_ani = round(max_ani, 4)
-            print(q, m, f"{jaccard:.04}", f"{cont:.04}", f"{maxcont:.04}", intersect_hashes, f"{query_ani:.04}", f"{match_ani:.04}", f"{average_ani:.04}", f"{max_ani:.04}")
-
-            if q == 'GCA_001593925' and m == 'GCA_001593935':
+            print(
+                q,
+                m,
+                f"{jaccard:.04}",
+                f"{cont:.04}",
+                f"{maxcont:.04}",
+                intersect_hashes,
+                f"{query_ani:.04}",
+                f"{match_ani:.04}",
+                f"{average_ani:.04}",
+                f"{max_ani:.04}",
+            )
+
+            if q == "GCA_001593925" and m == "GCA_001593935":
                 assert jaccard == 0.4983
                 assert cont == 0.747
                 assert maxcont == 0.747
@@ -1018,7 +1153,7 @@ def test_simple_hp(runtmp):
                 assert average_ani == 0.993
                 assert max_ani == 0.9949
 
-            if q == 'GCA_001593935' and m == 'GCA_001593925':
+            if q == "GCA_001593935" and m == "GCA_001593925":
                 assert jaccard == 0.4983
                 assert cont == 0.5994
                 assert maxcont == 0.747
@@ -1031,49 +1166,63 @@ def test_simple_hp(runtmp):
 
 def test_simple_hp_indexed(runtmp):
     # test indexed execution with hp, indexed
-    protsigs = get_test_data('hp.zip')
-    output = runtmp.output('out.csv')
-
-    protsigs_db = index_siglist(runtmp, protsigs, runtmp.output('db'),
-                             ksize=19, moltype='hp', scaled=100)
-
-    runtmp.sourmash('scripts', 'manysearch', protsigs, protsigs_db,
-                        '-k', '19', '-s', '100', '--moltype', 'hp',
-                        '-o', output, '-t', '0.01')
+    protsigs = get_test_data("hp.zip")
+    output = runtmp.output("out.csv")
+
+    protsigs_db = index_siglist(
+        runtmp, protsigs, runtmp.output("db"), ksize=19, moltype="hp", scaled=100
+    )
+
+    runtmp.sourmash(
+        "scripts",
+        "manysearch",
+        protsigs,
+        protsigs_db,
+        "-k",
+        "19",
+        "-s",
+        "100",
+        "--moltype",
+        "hp",
+        "-o",
+        output,
+        "-t",
+        "0.01",
+    )
 
     assert os.path.exists(output)
 
     df = pandas.read_csv(output)
     assert len(df) == 4
 
-    dd = df.to_dict(orient='index')
+    dd = df.to_dict(orient="index")
     print(dd)
 
     for idx, row in dd.items():
         print(row)
         # identical?
-        if row['match_name'] == row['query_name']:
-            assert float(row['containment'] == 1.0)
-            assert float(row['query_containment_ani']) == 1.0
+        if row["match_name"] == row["query_name"]:
+            assert float(row["containment"] == 1.0)
+            assert float(row["query_containment_ani"]) == 1.0
 
         else:
-        # confirm hand-checked numbers
-            q = row['query_name'].split()[0]
-            m = row['match_name'].split()[0]
-            cont = float(row['containment'])
-            intersect_hashes = int(row['intersect_hashes'])
-            query_ani = float(row['query_containment_ani'])
+            # confirm hand-checked numbers
+            q = row["query_name"].split()[0]
+            m = row["match_name"].split()[0]
+            cont = float(row["containment"])
+            intersect_hashes = int(row["intersect_hashes"])
+            query_ani = float(row["query_containment_ani"])
 
             cont = round(cont, 4)
             query_ani = round(query_ani, 4)
             print(q, m, f"{cont:.04}", intersect_hashes, f"{query_ani:.04}")
 
-            if q == 'GCA_001593925' and m == 'GCA_001593935':
+            if q == "GCA_001593925" and m == "GCA_001593935":
                 assert cont == 0.747
                 assert intersect_hashes == 1724
                 assert query_ani == 0.9949
 
-            if q == 'GCA_001593935' and m == 'GCA_001593925':
+            if q == "GCA_001593935" and m == "GCA_001593925":
                 assert cont == 0.5994
                 assert intersect_hashes == 1724
                 assert query_ani == 0.9911
@@ -1081,19 +1230,18 @@ def test_simple_hp_indexed(runtmp):
 
 def test_pretty_print(runtmp):
     # test pretty-printing of output
-    query = get_test_data('hmp-queries.sig.zip')
-    against = get_test_data('hmp-against.sig.zip')
+    query = get_test_data("hmp-queries.sig.zip")
+    against = get_test_data("hmp-against.sig.zip")
 
-    outcsv = runtmp.output('xxx.csv')
+    outcsv = runtmp.output("xxx.csv")
 
-    runtmp.sourmash('scripts', 'manysearch', query, against,
-                    '-o', outcsv)
+    runtmp.sourmash("scripts", "manysearch", query, against, "-o", outcsv)
     print(runtmp.last_result.out)
 
     # if this fails in the future, it might be because the order of the
     # output gets shuffled by multithreading. consider refactoring to
     # do line by line?
-    expected="""\
+    expected = """\
 query             p_genome avg_abund   p_metag   metagenome name
 --------          -------- ---------   -------   ---------------
 B. fragilis I1345   96.7%     7.3      27.5%     CD136
@@ -1105,17 +1253,16 @@ def test_pretty_print(runtmp):
 
 def test_no_pretty_print(runtmp):
     # test turning off pretty-printing of output
-    query = get_test_data('hmp-queries.sig.zip')
-    against = get_test_data('hmp-against.sig.zip')
+    query = get_test_data("hmp-queries.sig.zip")
+    against = get_test_data("hmp-against.sig.zip")
 
-    outcsv = runtmp.output('xxx.csv')
+    outcsv = runtmp.output("xxx.csv")
 
-    runtmp.sourmash('scripts', 'manysearch', query, against,
-                    '-o', outcsv, '-N')
+    runtmp.sourmash("scripts", "manysearch", query, against, "-o", outcsv, "-N")
     print(runtmp.last_result.out)
 
     # if this fails in the future, it might be because the order of the
     # output gets shuffled by multithreading. consider refactoring to
     # do line by line?
-    expected="p_genome"
+    expected = "p_genome"
     assert expected not in runtmp.last_result.out
diff --git a/src/python/tests/test_multisearch.py b/src/python/tests/test_multisearch.py
index 8763c688..43b6715a 100644
--- a/src/python/tests/test_multisearch.py
+++ b/src/python/tests/test_multisearch.py
@@ -5,79 +5,82 @@
 import sourmash
 
 from . import sourmash_tst_utils as utils
-from .sourmash_tst_utils import (get_test_data, make_file_list, zip_siglist,
-                                 index_siglist)
+from .sourmash_tst_utils import (
+    get_test_data,
+    make_file_list,
+    zip_siglist,
+    index_siglist,
+)
 
 
 def test_installed(runtmp):
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'multisearch')
+        runtmp.sourmash("scripts", "multisearch")
 
-    assert 'usage:  multisearch' in runtmp.last_result.err
+    assert "usage:  multisearch" in runtmp.last_result.err
 
 
 def test_simple_no_ani(runtmp, zip_query, zip_db):
     # test basic execution!
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(query_list, [sig2, sig47, sig63])
     make_file_list(against_list, [sig2, sig47, sig63])
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
     if zip_db:
-        against_list = zip_siglist(runtmp, against_list, runtmp.output('db.zip'))
+        against_list = zip_siglist(runtmp, against_list, runtmp.output("db.zip"))
     if zip_query:
-        query_list = zip_siglist(runtmp, query_list, runtmp.output('query.zip'))
+        query_list = zip_siglist(runtmp, query_list, runtmp.output("query.zip"))
 
-    runtmp.sourmash('scripts', 'multisearch', query_list, against_list,
-                    '-o', output)
+    runtmp.sourmash("scripts", "multisearch", query_list, against_list, "-o", output)
     assert os.path.exists(output)
 
     df = pandas.read_csv(output)
     assert len(df) == 5
 
-    dd = df.to_dict(orient='index')
+    dd = df.to_dict(orient="index")
     print(dd)
 
     for idx, row in dd.items():
         # identical?
-        if row['match_name'] == row['query_name']:
-            assert row['query_md5'] == row['match_md5'], row
-            assert float(row['containment'] == 1.0)
-            assert float(row['jaccard'] == 1.0)
-            assert float(row['max_containment'] == 1.0)
-            assert 'query_containment_ani' not in row
-            assert 'match_containment_ani' not in row
-            assert 'average_containment_ani' not in row
-            assert 'max_containment_ani' not in row
+        if row["match_name"] == row["query_name"]:
+            assert row["query_md5"] == row["match_md5"], row
+            assert float(row["containment"] == 1.0)
+            assert float(row["jaccard"] == 1.0)
+            assert float(row["max_containment"] == 1.0)
+            assert "query_containment_ani" not in row
+            assert "match_containment_ani" not in row
+            assert "average_containment_ani" not in row
+            assert "max_containment_ani" not in row
 
         else:
             # confirm hand-checked numbers
-            q = row['query_name'].split()[0]
-            m = row['match_name'].split()[0]
-            cont = float(row['containment'])
-            jaccard = float(row['jaccard'])
-            maxcont = float(row['max_containment'])
-            intersect_hashes = int(row['intersect_hashes'])
+            q = row["query_name"].split()[0]
+            m = row["match_name"].split()[0]
+            cont = float(row["containment"])
+            jaccard = float(row["jaccard"])
+            maxcont = float(row["max_containment"])
+            intersect_hashes = int(row["intersect_hashes"])
 
             jaccard = round(jaccard, 4)
             cont = round(cont, 4)
             maxcont = round(maxcont, 4)
             print(q, m, f"{jaccard:.04}", f"{cont:.04}", f"{maxcont:.04}")
 
-            if q == 'NC_011665.1' and m == 'NC_009661.1':
+            if q == "NC_011665.1" and m == "NC_009661.1":
                 assert jaccard == 0.3207
                 assert cont == 0.4828
                 assert maxcont == 0.4885
                 assert intersect_hashes == 2529
 
-            if q == 'NC_009661.1' and m == 'NC_011665.1':
+            if q == "NC_009661.1" and m == "NC_011665.1":
                 assert jaccard == 0.3207
                 assert cont == 0.4885
                 assert maxcont == 0.4885
@@ -86,65 +89,65 @@ def test_simple_no_ani(runtmp, zip_query, zip_db):
 
 def test_simple_ani(runtmp, zip_query, zip_db, indexed_query, indexed_against):
     # test basic execution!
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(query_list, [sig2, sig47, sig63])
     make_file_list(against_list, [sig2, sig47, sig63])
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
     if zip_db:
-        against_list = zip_siglist(runtmp, against_list, runtmp.output('db.zip'))
+        against_list = zip_siglist(runtmp, against_list, runtmp.output("db.zip"))
 
     if zip_query:
-        query_list = zip_siglist(runtmp, query_list, runtmp.output('query.zip'))
+        query_list = zip_siglist(runtmp, query_list, runtmp.output("query.zip"))
 
     if indexed_query:
-        query_list = index_siglist(runtmp, query_list, runtmp.output('q_db'))
+        query_list = index_siglist(runtmp, query_list, runtmp.output("q_db"))
 
     if indexed_against:
-        against_list = index_siglist(runtmp, against_list, runtmp.output('db'))
+        against_list = index_siglist(runtmp, against_list, runtmp.output("db"))
 
-    runtmp.sourmash('scripts', 'multisearch', query_list, against_list,
-                    '-o', output, '--ani')
+    runtmp.sourmash(
+        "scripts", "multisearch", query_list, against_list, "-o", output, "--ani"
+    )
     assert os.path.exists(output)
 
     df = pandas.read_csv(output)
     assert len(df) == 5
 
-    dd = df.to_dict(orient='index')
+    dd = df.to_dict(orient="index")
     print(dd)
 
     for idx, row in dd.items():
         # identical?
-        if row['match_name'] == row['query_name']:
-            assert row['query_md5'] == row['match_md5'], row
-            assert float(row['containment'] == 1.0)
-            assert float(row['jaccard'] == 1.0)
-            assert float(row['max_containment'] == 1.0)
-            assert float(row['query_containment_ani'] == 1.0)
-            assert float(row['match_containment_ani'] == 1.0)
-            assert float(row['average_containment_ani'] == 1.0)
-            assert float(row['max_containment_ani'] == 1.0)
+        if row["match_name"] == row["query_name"]:
+            assert row["query_md5"] == row["match_md5"], row
+            assert float(row["containment"] == 1.0)
+            assert float(row["jaccard"] == 1.0)
+            assert float(row["max_containment"] == 1.0)
+            assert float(row["query_containment_ani"] == 1.0)
+            assert float(row["match_containment_ani"] == 1.0)
+            assert float(row["average_containment_ani"] == 1.0)
+            assert float(row["max_containment_ani"] == 1.0)
 
         else:
             # confirm hand-checked numbers
-            q = row['query_name'].split()[0]
-            m = row['match_name'].split()[0]
-            cont = float(row['containment'])
-            jaccard = float(row['jaccard'])
-            maxcont = float(row['max_containment'])
-            intersect_hashes = int(row['intersect_hashes'])
-            q1_ani = float(row['query_containment_ani'])
-            q2_ani = float(row['match_containment_ani'])
-            avg_ani = float(row['average_containment_ani'])
-            max_ani = float(row['max_containment_ani'])
-
+            q = row["query_name"].split()[0]
+            m = row["match_name"].split()[0]
+            cont = float(row["containment"])
+            jaccard = float(row["jaccard"])
+            maxcont = float(row["max_containment"])
+            intersect_hashes = int(row["intersect_hashes"])
+            q1_ani = float(row["query_containment_ani"])
+            q2_ani = float(row["match_containment_ani"])
+            avg_ani = float(row["average_containment_ani"])
+            max_ani = float(row["max_containment_ani"])
 
             jaccard = round(jaccard, 4)
             cont = round(cont, 4)
@@ -153,9 +156,19 @@ def test_simple_ani(runtmp, zip_query, zip_db, indexed_query, indexed_against):
             q2_ani = round(q2_ani, 4)
             avg_ani = round(avg_ani, 4)
             max_ani = round(max_ani, 4)
-            print(q, m, f"{jaccard:.04}", f"{cont:.04}", f"{maxcont:.04}", f"{q1_ani:.04}", f"{q2_ani:.04}", f"{avg_ani:.04}", f"{max_ani:.04}")
-
-            if q == 'NC_011665.1' and m == 'NC_009661.1':
+            print(
+                q,
+                m,
+                f"{jaccard:.04}",
+                f"{cont:.04}",
+                f"{maxcont:.04}",
+                f"{q1_ani:.04}",
+                f"{q2_ani:.04}",
+                f"{avg_ani:.04}",
+                f"{max_ani:.04}",
+            )
+
+            if q == "NC_011665.1" and m == "NC_009661.1":
                 assert jaccard == 0.3207
                 assert cont == 0.4828
                 assert maxcont == 0.4885
@@ -165,7 +178,7 @@ def test_simple_ani(runtmp, zip_query, zip_db, indexed_query, indexed_against):
                 assert avg_ani == 0.977
                 assert max_ani == 0.9772
 
-            if q == 'NC_009661.1' and m == 'NC_011665.1':
+            if q == "NC_009661.1" and m == "NC_011665.1":
                 assert jaccard == 0.3207
                 assert cont == 0.4885
                 assert maxcont == 0.4885
@@ -178,53 +191,53 @@ def test_simple_ani(runtmp, zip_query, zip_db, indexed_query, indexed_against):
 
 def test_simple_ani_list_of_zips(runtmp):
     # test basic execution against a pathlist file of zips
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.sig.zip')
-    sig47 = get_test_data('47.sig.zip')
-    sig63 = get_test_data('63.sig.zip')
+    sig2 = get_test_data("2.sig.zip")
+    sig47 = get_test_data("47.sig.zip")
+    sig63 = get_test_data("63.sig.zip")
 
     make_file_list(query_list, [sig2, sig47, sig63])
     make_file_list(against_list, [sig2, sig47, sig63])
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
-    runtmp.sourmash('scripts', 'multisearch', query_list, against_list,
-                    '-o', output, '--ani')
+    runtmp.sourmash(
+        "scripts", "multisearch", query_list, against_list, "-o", output, "--ani"
+    )
     assert os.path.exists(output)
 
     df = pandas.read_csv(output)
     assert len(df) == 5
 
-    dd = df.to_dict(orient='index')
+    dd = df.to_dict(orient="index")
     print(dd)
 
     for idx, row in dd.items():
         # identical?
-        if row['match_name'] == row['query_name']:
-            assert row['query_md5'] == row['match_md5'], row
-            assert float(row['containment'] == 1.0)
-            assert float(row['jaccard'] == 1.0)
-            assert float(row['max_containment'] == 1.0)
-            assert float(row['query_containment_ani'] == 1.0)
-            assert float(row['match_containment_ani'] == 1.0)
-            assert float(row['average_containment_ani'] == 1.0)
-            assert float(row['max_containment_ani'] == 1.0)
+        if row["match_name"] == row["query_name"]:
+            assert row["query_md5"] == row["match_md5"], row
+            assert float(row["containment"] == 1.0)
+            assert float(row["jaccard"] == 1.0)
+            assert float(row["max_containment"] == 1.0)
+            assert float(row["query_containment_ani"] == 1.0)
+            assert float(row["match_containment_ani"] == 1.0)
+            assert float(row["average_containment_ani"] == 1.0)
+            assert float(row["max_containment_ani"] == 1.0)
 
         else:
             # confirm hand-checked numbers
-            q = row['query_name'].split()[0]
-            m = row['match_name'].split()[0]
-            cont = float(row['containment'])
-            jaccard = float(row['jaccard'])
-            maxcont = float(row['max_containment'])
-            intersect_hashes = int(row['intersect_hashes'])
-            q1_ani = float(row['query_containment_ani'])
-            q2_ani = float(row['match_containment_ani'])
-            avg_ani = float(row['average_containment_ani'])
-            max_ani = float(row['max_containment_ani'])
-
+            q = row["query_name"].split()[0]
+            m = row["match_name"].split()[0]
+            cont = float(row["containment"])
+            jaccard = float(row["jaccard"])
+            maxcont = float(row["max_containment"])
+            intersect_hashes = int(row["intersect_hashes"])
+            q1_ani = float(row["query_containment_ani"])
+            q2_ani = float(row["match_containment_ani"])
+            avg_ani = float(row["average_containment_ani"])
+            max_ani = float(row["max_containment_ani"])
 
             jaccard = round(jaccard, 4)
             cont = round(cont, 4)
@@ -233,9 +246,19 @@ def test_simple_ani_list_of_zips(runtmp):
             q2_ani = round(q2_ani, 4)
             avg_ani = round(avg_ani, 4)
             max_ani = round(max_ani, 4)
-            print(q, m, f"{jaccard:.04}", f"{cont:.04}", f"{maxcont:.04}", f"{q1_ani:.04}", f"{q2_ani:.04}", f"{avg_ani:.04}", f"{max_ani:.04}")
-
-            if q == 'NC_011665.1' and m == 'NC_009661.1':
+            print(
+                q,
+                m,
+                f"{jaccard:.04}",
+                f"{cont:.04}",
+                f"{maxcont:.04}",
+                f"{q1_ani:.04}",
+                f"{q2_ani:.04}",
+                f"{avg_ani:.04}",
+                f"{max_ani:.04}",
+            )
+
+            if q == "NC_011665.1" and m == "NC_009661.1":
                 assert jaccard == 0.3207
                 assert cont == 0.4828
                 assert maxcont == 0.4885
@@ -245,7 +268,7 @@ def test_simple_ani_list_of_zips(runtmp):
                 assert avg_ani == 0.977
                 assert max_ani == 0.9772
 
-            if q == 'NC_009661.1' and m == 'NC_011665.1':
+            if q == "NC_009661.1" and m == "NC_011665.1":
                 assert jaccard == 0.3207
                 assert cont == 0.4885
                 assert maxcont == 0.4885
@@ -258,101 +281,110 @@ def test_simple_ani_list_of_zips(runtmp):
 
 def test_simple_ani_list_of_csv(runtmp):
     # test basic execution against a pathlist file of manifests
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.sig.zip')
-    sig47 = get_test_data('47.sig.zip')
-    sig63 = get_test_data('63.sig.zip')
+    sig2 = get_test_data("2.sig.zip")
+    sig47 = get_test_data("47.sig.zip")
+    sig63 = get_test_data("63.sig.zip")
 
-    runtmp.sourmash('sig', 'collect', sig2, '-o', 'sig2.mf.csv', '-F', 'csv')
-    runtmp.sourmash('sig', 'collect', sig47, '-o', 'sig47.mf.csv', '-F', 'csv')
-    runtmp.sourmash('sig', 'collect', sig63, '-o', 'sig63.mf.csv', '-F', 'csv')
+    runtmp.sourmash("sig", "collect", sig2, "-o", "sig2.mf.csv", "-F", "csv")
+    runtmp.sourmash("sig", "collect", sig47, "-o", "sig47.mf.csv", "-F", "csv")
+    runtmp.sourmash("sig", "collect", sig63, "-o", "sig63.mf.csv", "-F", "csv")
 
-    make_file_list(query_list, ['sig2.mf.csv', 'sig47.mf.csv', 'sig63.mf.csv'])
-    make_file_list(against_list, ['sig2.mf.csv', 'sig47.mf.csv', 'sig63.mf.csv'])
+    make_file_list(query_list, ["sig2.mf.csv", "sig47.mf.csv", "sig63.mf.csv"])
+    make_file_list(against_list, ["sig2.mf.csv", "sig47.mf.csv", "sig63.mf.csv"])
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
-    runtmp.sourmash('scripts', 'multisearch', query_list, against_list,
-                    '-o', output, '--ani')
+    runtmp.sourmash(
+        "scripts", "multisearch", query_list, against_list, "-o", output, "--ani"
+    )
     assert os.path.exists(output)
 
     df = pandas.read_csv(output)
     assert len(df) == 5
 
-    dd = df.to_dict(orient='index')
+    dd = df.to_dict(orient="index")
     print(dd)
 
 
 def test_simple_ani_standalone_manifest(runtmp):
     # test basic execution of a standalone manifest
-    against_list = runtmp.output('against.sig.zip')
+    against_list = runtmp.output("against.sig.zip")
 
-    sig2 = get_test_data('2.sig.zip')
-    sig47 = get_test_data('47.sig.zip')
-    sig63 = get_test_data('63.sig.zip')
+    sig2 = get_test_data("2.sig.zip")
+    sig47 = get_test_data("47.sig.zip")
+    sig63 = get_test_data("63.sig.zip")
 
-    runtmp.sourmash('sig', 'cat', sig2, sig47, sig63, '-o', against_list)
+    runtmp.sourmash("sig", "cat", sig2, sig47, sig63, "-o", against_list)
 
-    picklist_file = runtmp.output('pl.csv')
-    with open(picklist_file, 'w', newline='') as fp:
+    picklist_file = runtmp.output("pl.csv")
+    with open(picklist_file, "w", newline="") as fp:
         w = csv.writer(fp)
-        w.writerow(['ident'])
-        w.writerow(['CP001071.1'])
+        w.writerow(["ident"])
+        w.writerow(["CP001071.1"])
 
     # use picklist to create a standalone manifest
-    query_csv = runtmp.output('select.mf.csv')
-    runtmp.sourmash('sig', 'check', '--picklist',
-                    f'{picklist_file}:ident:ident',
-                    '-m', query_csv, against_list)
-
-    output = runtmp.output('out.csv')
-
-    runtmp.sourmash('scripts', 'multisearch', query_csv, against_list,
-                    '-o', output, '--ani')
+    query_csv = runtmp.output("select.mf.csv")
+    runtmp.sourmash(
+        "sig",
+        "check",
+        "--picklist",
+        f"{picklist_file}:ident:ident",
+        "-m",
+        query_csv,
+        against_list,
+    )
+
+    output = runtmp.output("out.csv")
+
+    runtmp.sourmash(
+        "scripts", "multisearch", query_csv, against_list, "-o", output, "--ani"
+    )
     assert os.path.exists(output)
 
     df = pandas.read_csv(output)
-    assert len(df) == 1         # should only be the one, identical match.
+    assert len(df) == 1  # should only be the one, identical match.
 
-    dd = df.to_dict(orient='index')
+    dd = df.to_dict(orient="index")
     print(dd)
 
     for idx, row in dd.items():
         # identical?
-        if row['match_name'] == row['query_name']:
-            assert row['query_md5'] == row['match_md5'], row
-            assert float(row['containment'] == 1.0)
-            assert float(row['jaccard'] == 1.0)
-            assert float(row['max_containment'] == 1.0)
-            assert float(row['query_containment_ani'] == 1.0)
-            assert float(row['match_containment_ani'] == 1.0)
-            assert float(row['average_containment_ani'] == 1.0)
-            assert float(row['max_containment_ani'] == 1.0)
+        if row["match_name"] == row["query_name"]:
+            assert row["query_md5"] == row["match_md5"], row
+            assert float(row["containment"] == 1.0)
+            assert float(row["jaccard"] == 1.0)
+            assert float(row["max_containment"] == 1.0)
+            assert float(row["query_containment_ani"] == 1.0)
+            assert float(row["match_containment_ani"] == 1.0)
+            assert float(row["average_containment_ani"] == 1.0)
+            assert float(row["max_containment_ani"] == 1.0)
 
 
 def test_simple_threshold(runtmp, zip_query, zip_db):
     # test with a simple threshold => only 3 results
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(query_list, [sig2, sig47, sig63])
     make_file_list(against_list, [sig2, sig47, sig63])
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
     if zip_db:
-        against_list = zip_siglist(runtmp, against_list, runtmp.output('db.zip'))
+        against_list = zip_siglist(runtmp, against_list, runtmp.output("db.zip"))
     if zip_query:
-        query_list = zip_siglist(runtmp, query_list, runtmp.output('query.zip'))
+        query_list = zip_siglist(runtmp, query_list, runtmp.output("query.zip"))
 
-    runtmp.sourmash('scripts', 'multisearch', query_list, against_list,
-                    '-o', output, '-t', '0.5')
+    runtmp.sourmash(
+        "scripts", "multisearch", query_list, against_list, "-o", output, "-t", "0.5"
+    )
     assert os.path.exists(output)
 
     df = pandas.read_csv(output)
@@ -361,26 +393,27 @@ def test_simple_threshold(runtmp, zip_query, zip_db):
 
 def test_simple_manifest(runtmp):
     # test with a simple threshold => only 3 results
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(query_list, [sig2, sig47, sig63])
     make_file_list(against_list, [sig2, sig47, sig63])
 
-    query_mf = runtmp.output('qmf.csv')
-    against_mf = runtmp.output('amf.csv')
+    query_mf = runtmp.output("qmf.csv")
+    against_mf = runtmp.output("amf.csv")
 
     runtmp.sourmash("sig", "manifest", query_list, "-o", query_mf)
     runtmp.sourmash("sig", "manifest", against_list, "-o", against_mf)
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
-    runtmp.sourmash('scripts', 'multisearch', query_mf, against_mf,
-                    '-o', output, '-t', '0.5')
+    runtmp.sourmash(
+        "scripts", "multisearch", query_mf, against_mf, "-o", output, "-t", "0.5"
+    )
     assert os.path.exists(output)
 
     df = pandas.read_csv(output)
@@ -389,33 +422,34 @@ def test_simple_manifest(runtmp):
 
 def test_lists_of_standalone_manifests(runtmp, capfd):
     # test pathlists of manifests
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
-    sig2_mf = runtmp.output('2.mf.csv')
-    runtmp.sourmash('sig', 'collect', sig2, '-o', sig2_mf, '-F', 'csv')
-    sig47_mf = runtmp.output('47.mf.csv')
-    runtmp.sourmash('sig', 'collect', sig47, '-o', sig47_mf, '-F', 'csv')
-    sig63_mf = runtmp.output('63.mf.csv')
-    runtmp.sourmash('sig', 'collect', sig63, '-o', sig63_mf, '-F', 'csv')
+    sig2_mf = runtmp.output("2.mf.csv")
+    runtmp.sourmash("sig", "collect", sig2, "-o", sig2_mf, "-F", "csv")
+    sig47_mf = runtmp.output("47.mf.csv")
+    runtmp.sourmash("sig", "collect", sig47, "-o", sig47_mf, "-F", "csv")
+    sig63_mf = runtmp.output("63.mf.csv")
+    runtmp.sourmash("sig", "collect", sig63, "-o", sig63_mf, "-F", "csv")
 
     make_file_list(query_list, [sig2_mf, sig47_mf, sig63_mf])
     make_file_list(against_list, [sig2, sig47, sig63])
 
-    query_mf = runtmp.output('qmf.csv')
-    against_mf = runtmp.output('amf.csv')
+    query_mf = runtmp.output("qmf.csv")
+    against_mf = runtmp.output("amf.csv")
 
     runtmp.sourmash("sig", "manifest", query_list, "-o", query_mf)
     runtmp.sourmash("sig", "manifest", against_list, "-o", against_mf)
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
-    runtmp.sourmash('scripts', 'multisearch', query_mf, against_mf,
-                    '-o', output, '-t', '0.5')
+    runtmp.sourmash(
+        "scripts", "multisearch", query_mf, against_mf, "-o", output, "-t", "0.5"
+    )
     assert os.path.exists(output)
 
     df = pandas.read_csv(output)
@@ -427,44 +461,44 @@ def test_lists_of_standalone_manifests(runtmp, capfd):
 
 def test_missing_query(runtmp, capfd, zip_query):
     # test with a missing query list
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(against_list, [sig2, sig47, sig63])
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
     if zip_query:
-        query_list = runtmp.output('query.zip')
+        query_list = runtmp.output("query.zip")
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'multisearch', query_list, against_list,
-                        '-o', output)
+        runtmp.sourmash(
+            "scripts", "multisearch", query_list, against_list, "-o", output
+        )
 
     captured = capfd.readouterr()
     print(captured.err)
 
-    assert 'Error: No such file or directory' in captured.err
+    assert "Error: No such file or directory" in captured.err
 
 
 def test_sig_query(runtmp, capfd):
     # sig is ok as query now
-    against_list = runtmp.output('against.txt')
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(against_list, [sig2, sig47, sig63])
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
-    runtmp.sourmash('scripts', 'multisearch', sig2, against_list,
-                        '-o', output)
+    runtmp.sourmash("scripts", "multisearch", sig2, against_list, "-o", output)
 
     captured = capfd.readouterr()
     print(captured.err)
@@ -476,98 +510,99 @@ def test_sig_query(runtmp, capfd):
 
 def test_bad_query(runtmp, capfd):
     # test with a bad query list (a missing file)
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
     make_file_list(query_list, [sig2, "no-exist"])
     make_file_list(against_list, [sig2, sig47, sig63])
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
-    runtmp.sourmash('scripts', 'multisearch', query_list, against_list,
-                    '-o', output)
+    runtmp.sourmash("scripts", "multisearch", query_list, against_list, "-o", output)
 
     captured = capfd.readouterr()
     print(captured.err)
 
     assert "WARNING: could not load sketches from path 'no-exist'" in captured.err
-    assert "WARNING: 1 query paths failed to load. See error messages above." in captured.err
+    assert (
+        "WARNING: 1 query paths failed to load. See error messages above."
+        in captured.err
+    )
 
 
 def test_bad_query_3(runtmp, capfd):
     # test with a bad query (a .sig.gz file renamed as zip file)
-    against_list = runtmp.output('against.txt')
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
-    query_zip = runtmp.output('query.zip')
+    query_zip = runtmp.output("query.zip")
     # cp sig2 into query_zip
-    with open(query_zip, 'wb') as fp:
-        with open(sig2, 'rb') as fp2:
+    with open(query_zip, "wb") as fp:
+        with open(sig2, "rb") as fp2:
             fp.write(fp2.read())
 
     make_file_list(against_list, [sig2, sig47, sig63])
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'multisearch', query_zip, against_list,
-                        '-o', output)
+        runtmp.sourmash("scripts", "multisearch", query_zip, against_list, "-o", output)
 
     captured = capfd.readouterr()
     print(captured.err)
 
-    assert 'InvalidArchive' in captured.err
+    assert "InvalidArchive" in captured.err
 
 
 def test_missing_against(runtmp, capfd, zip_db):
     # test with a missing against list
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(query_list, [sig2, sig47, sig63])
     # do not create against_list
 
     if zip_db:
-        #specify .zip but don't create the file
-        against_list = runtmp.output('db.zip')
+        # specify .zip but don't create the file
+        against_list = runtmp.output("db.zip")
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'multisearch', query_list, against_list,
-                        '-o', output)
+        runtmp.sourmash(
+            "scripts", "multisearch", query_list, against_list, "-o", output
+        )
 
     captured = capfd.readouterr()
     print(captured.err)
 
-    assert 'Error: No such file or directory' in captured.err
+    assert "Error: No such file or directory" in captured.err
 
 
 def test_sig_against(runtmp, capfd):
     # against can be sig now
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(query_list, [sig2, sig47, sig63])
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
-    runtmp.sourmash('scripts', 'multisearch', query_list, sig2,
-                        '-o', output)
+    runtmp.sourmash("scripts", "multisearch", query_list, sig2, "-o", output)
 
     captured = capfd.readouterr()
     print(captured.err)
@@ -579,44 +614,47 @@ def test_sig_against(runtmp, capfd):
 
 def test_bad_against(runtmp, capfd):
     # test with a bad against list (a missing file)
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
     make_file_list(query_list, [sig2, sig47, sig63])
     make_file_list(against_list, [sig2, "no-exist"])
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
-    runtmp.sourmash('scripts', 'multisearch', query_list, against_list,
-                    '-o', output)
+    runtmp.sourmash("scripts", "multisearch", query_list, against_list, "-o", output)
 
     captured = capfd.readouterr()
     print(captured.err)
 
     assert "WARNING: could not load sketches from path 'no-exist'" in captured.err
-    assert "WARNING: 1 search paths failed to load. See error messages above." in captured.err
+    assert (
+        "WARNING: 1 search paths failed to load. See error messages above."
+        in captured.err
+    )
 
 
 def test_empty_query(runtmp, capfd):
     # test with an empty query list - fail with error
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(query_list, [])
     make_file_list(against_list, [sig2, sig47, sig63])
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'multisearch', query_list, against_list,
-                        '-o', output)
+        runtmp.sourmash(
+            "scripts", "multisearch", query_list, against_list, "-o", output
+        )
 
     print(runtmp.last_result.err)
     captured = capfd.readouterr()
@@ -626,186 +664,184 @@ def test_empty_query(runtmp, capfd):
 
 def test_nomatch_query_warn(runtmp, capfd, zip_query):
     # test a non-matching (diff ksize) in query; do we get warning message?
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig1 = get_test_data('1.fa.k21.sig.gz')
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig1 = get_test_data("1.fa.k21.sig.gz")
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(query_list, [sig2, sig47, sig63, sig1])
     make_file_list(against_list, [sig2, sig47, sig63])
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
     if zip_query:
-        query_list = zip_siglist(runtmp, query_list, runtmp.output('query.zip'))
+        query_list = zip_siglist(runtmp, query_list, runtmp.output("query.zip"))
 
-    runtmp.sourmash('scripts', 'multisearch', query_list, against_list,
-                    '-o', output)
+    runtmp.sourmash("scripts", "multisearch", query_list, against_list, "-o", output)
     assert os.path.exists(output)
 
     captured = capfd.readouterr()
     print(captured.err)
 
-    assert 'WARNING: skipped 1 query paths - no compatible signatures' in captured.err
+    assert "WARNING: skipped 1 query paths - no compatible signatures" in captured.err
 
 
 def test_nomatch_query_exit(runtmp, capfd, zip_query):
     # test loading no matching sketches - do we error exit appropriately?
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig1 = get_test_data('1.fa.k21.sig.gz')
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig1 = get_test_data("1.fa.k21.sig.gz")
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(query_list, [sig1])
     make_file_list(against_list, [sig2, sig47, sig63])
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
     if zip_query:
-        query_list = zip_siglist(runtmp, query_list, runtmp.output('query.zip'))
+        query_list = zip_siglist(runtmp, query_list, runtmp.output("query.zip"))
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'multisearch', query_list, against_list,
-                        '-o', output)
+        runtmp.sourmash(
+            "scripts", "multisearch", query_list, against_list, "-o", output
+        )
 
     captured = capfd.readouterr()
     print(captured.err)
 
-    assert 'WARNING: skipped 1 query paths - no compatible signatures' in captured.err
-    assert 'No query signatures loaded, exiting' in captured.err
+    assert "WARNING: skipped 1 query paths - no compatible signatures" in captured.err
+    assert "No query signatures loaded, exiting" in captured.err
 
 
 def test_nomatch_against(runtmp, capfd, zip_query):
     # test a non-matching (diff ksize) in against; do we get warning message?
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig1 = get_test_data('1.fa.k21.sig.gz')
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig1 = get_test_data("1.fa.k21.sig.gz")
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(query_list, [sig2, sig47, sig63, sig1])
     make_file_list(against_list, [sig2, sig47, sig63])
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
     if zip_query:
-        query_list = zip_siglist(runtmp, query_list, runtmp.output('query.zip'))
+        query_list = zip_siglist(runtmp, query_list, runtmp.output("query.zip"))
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'multisearch', query_list, against_list,
-                        '-o', output, '-k', '21')
+        runtmp.sourmash(
+            "scripts", "multisearch", query_list, against_list, "-o", output, "-k", "21"
+        )
 
     captured = capfd.readouterr()
     print(captured.err)
 
-    assert 'WARNING: skipped 3 search paths - no compatible signatures' in captured.err
-    assert 'No search signatures loaded, exiting' in captured.err
+    assert "WARNING: skipped 3 search paths - no compatible signatures" in captured.err
+    assert "No search signatures loaded, exiting" in captured.err
 
 
 def test_load_only_one_bug(runtmp, capfd, zip_db):
     # check that we behave properly when presented with multiple against
     # sketches
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig1_k31 = get_test_data('1.fa.k31.sig.gz')
+    sig1_k31 = get_test_data("1.fa.k31.sig.gz")
 
     # note: this was created as a 3-sketch-in-one-signature directly
     # via sourmash sketch dna -p k=21,k=31,k=51.
-    sig1_all = get_test_data('1.combined.sig.gz')
+    sig1_all = get_test_data("1.combined.sig.gz")
 
     make_file_list(query_list, [sig1_k31])
     make_file_list(against_list, [sig1_all])
 
     if zip_db:
-        against_list = zip_siglist(runtmp, against_list, runtmp.output('db.zip'))
+        against_list = zip_siglist(runtmp, against_list, runtmp.output("db.zip"))
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
-    runtmp.sourmash('scripts', 'multisearch', query_list, against_list,
-                    '-o', output)
+    runtmp.sourmash("scripts", "multisearch", query_list, against_list, "-o", output)
     assert os.path.exists(output)
 
     captured = capfd.readouterr()
     print(captured.err)
 
-    assert not 'WARNING: skipped 1 paths - no compatible signatures.' in captured.err
-    assert not 'WARNING: no compatible sketches in path' in captured.err
+    assert not "WARNING: skipped 1 paths - no compatible signatures." in captured.err
+    assert not "WARNING: no compatible sketches in path" in captured.err
 
 
 def test_load_only_one_bug_as_query(runtmp, capfd, zip_query):
     # check that we behave properly when presented with multiple query
     # sketches in one file, with only one matching.
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig1_k31 = get_test_data('1.fa.k31.sig.gz')
+    sig1_k31 = get_test_data("1.fa.k31.sig.gz")
 
     # note: this was created as a 3-sketch-in-one-signature directly
     # via sourmash sketch dna -p k=21,k=31,k=51.
-    sig1_all = get_test_data('1.combined.sig.gz')
+    sig1_all = get_test_data("1.combined.sig.gz")
 
     make_file_list(query_list, [sig1_all])
     make_file_list(against_list, [sig1_k31])
 
     if zip_query:
-        query_list = zip_siglist(runtmp, query_list, runtmp.output('query.zip'))
+        query_list = zip_siglist(runtmp, query_list, runtmp.output("query.zip"))
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
-    runtmp.sourmash('scripts', 'multisearch', query_list, against_list,
-                    '-o', output)
+    runtmp.sourmash("scripts", "multisearch", query_list, against_list, "-o", output)
     assert os.path.exists(output)
 
     captured = capfd.readouterr()
     print(captured.err)
 
-    assert not 'WARNING: skipped 1 paths - no compatible signatures.' in captured.err
-    assert not 'WARNING: no compatible sketches in path ' in captured.err
+    assert not "WARNING: skipped 1 paths - no compatible signatures." in captured.err
+    assert not "WARNING: no compatible sketches in path " in captured.err
 
 
 def test_md5(runtmp, zip_query, zip_db):
     # test that md5s match what was in the original files, not downsampled etc.
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(query_list, [sig2, sig47, sig63])
     make_file_list(against_list, [sig2, sig47, sig63])
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
     if zip_query:
-        query_list = zip_siglist(runtmp, query_list, runtmp.output('query.zip'))
+        query_list = zip_siglist(runtmp, query_list, runtmp.output("query.zip"))
     if zip_db:
-        against_list = zip_siglist(runtmp, against_list, runtmp.output('db.zip'))
+        against_list = zip_siglist(runtmp, against_list, runtmp.output("db.zip"))
 
-    runtmp.sourmash('scripts', 'multisearch', query_list, against_list,
-                    '-o', output)
+    runtmp.sourmash("scripts", "multisearch", query_list, against_list, "-o", output)
     assert os.path.exists(output)
 
     df = pandas.read_csv(output)
     assert len(df) == 5
 
-    md5s = list(df['query_md5'])
+    md5s = list(df["query_md5"])
     print(md5s)
 
     for query_file in (sig2, sig47, sig63):
         for ss in sourmash.load_file_as_signatures(query_file, ksize=31):
             assert ss.md5sum() in md5s
 
-    md5s = list(df['match_md5'])
+    md5s = list(df["match_md5"])
     print(md5s)
 
     for against_file in (sig2, sig47, sig63):
@@ -815,45 +851,57 @@ def test_md5(runtmp, zip_query, zip_db):
 
 def test_simple_prot(runtmp):
     # test basic execution with protein sigs
-    sigs = get_test_data('protein.zip')
-
-    output = runtmp.output('out.csv')
-
-    runtmp.sourmash('scripts', 'multisearch', sigs, sigs,
-                    '-o', output, '--moltype', 'protein',
-                    '-k', '19', '--scaled', '100', '--ani')
+    sigs = get_test_data("protein.zip")
+
+    output = runtmp.output("out.csv")
+
+    runtmp.sourmash(
+        "scripts",
+        "multisearch",
+        sigs,
+        sigs,
+        "-o",
+        output,
+        "--moltype",
+        "protein",
+        "-k",
+        "19",
+        "--scaled",
+        "100",
+        "--ani",
+    )
     assert os.path.exists(output)
 
     df = pandas.read_csv(output)
     assert len(df) == 4
 
-    dd = df.to_dict(orient='index')
+    dd = df.to_dict(orient="index")
     print(dd)
 
     for idx, row in dd.items():
         # identical?
-        if row['match_name'] == row['query_name']:
-            assert row['query_md5'] == row['match_md5'], row
-            assert float(row['containment'] == 1.0)
-            assert float(row['jaccard'] == 1.0)
-            assert float(row['max_containment'] == 1.0)
-            assert float(row['query_containment_ani'] == 1.0)
-            assert float(row['match_containment_ani'] == 1.0)
-            assert float(row['average_containment_ani'] == 1.0)
-            assert float(row['max_containment_ani'] == 1.0)
+        if row["match_name"] == row["query_name"]:
+            assert row["query_md5"] == row["match_md5"], row
+            assert float(row["containment"] == 1.0)
+            assert float(row["jaccard"] == 1.0)
+            assert float(row["max_containment"] == 1.0)
+            assert float(row["query_containment_ani"] == 1.0)
+            assert float(row["match_containment_ani"] == 1.0)
+            assert float(row["average_containment_ani"] == 1.0)
+            assert float(row["max_containment_ani"] == 1.0)
 
         else:
             # confirm hand-checked numbers
-            q = row['query_name'].split()[0]
-            m = row['match_name'].split()[0]
-            cont = float(row['containment'])
-            jaccard = float(row['jaccard'])
-            maxcont = float(row['max_containment'])
-            intersect_hashes = int(row['intersect_hashes'])
-            q1_ani = float(row['query_containment_ani'])
-            q2_ani = float(row['match_containment_ani'])
-            avg_ani = float(row['average_containment_ani'])
-            max_ani = float(row['max_containment_ani'])
+            q = row["query_name"].split()[0]
+            m = row["match_name"].split()[0]
+            cont = float(row["containment"])
+            jaccard = float(row["jaccard"])
+            maxcont = float(row["max_containment"])
+            intersect_hashes = int(row["intersect_hashes"])
+            q1_ani = float(row["query_containment_ani"])
+            q2_ani = float(row["match_containment_ani"])
+            avg_ani = float(row["average_containment_ani"])
+            max_ani = float(row["max_containment_ani"])
 
             jaccard = round(jaccard, 4)
             cont = round(cont, 4)
@@ -862,9 +910,20 @@ def test_simple_prot(runtmp):
             q2_ani = round(q2_ani, 4)
             avg_ani = round(avg_ani, 4)
             max_ani = round(max_ani, 4)
-            print(q, m, f"{jaccard:.04}", f"{cont:.04}", f"{maxcont:.04}", intersect_hashes, f"{q1_ani:.04}", f"{q2_ani:.04}", f"{avg_ani:.04}", f"{max_ani:.04}")
-
-            if q == 'GCA_001593925' and m == 'GCA_001593935':
+            print(
+                q,
+                m,
+                f"{jaccard:.04}",
+                f"{cont:.04}",
+                f"{maxcont:.04}",
+                intersect_hashes,
+                f"{q1_ani:.04}",
+                f"{q2_ani:.04}",
+                f"{avg_ani:.04}",
+                f"{max_ani:.04}",
+            )
+
+            if q == "GCA_001593925" and m == "GCA_001593935":
                 assert jaccard == 0.0434
                 assert cont == 0.1003
                 assert maxcont == 0.1003
@@ -874,7 +933,7 @@ def test_simple_prot(runtmp):
                 assert avg_ani == 0.8781
                 assert max_ani == 0.886
 
-            if q == 'GCA_001593935' and m == 'GCA_001593925':
+            if q == "GCA_001593935" and m == "GCA_001593925":
                 assert jaccard == 0.0434
                 assert cont == 0.0712
                 assert maxcont == 0.1003
@@ -887,45 +946,57 @@ def test_simple_prot(runtmp):
 
 def test_simple_dayhoff(runtmp):
     # test basic execution with dayhoff sigs
-    sigs = get_test_data('dayhoff.zip')
-
-    output = runtmp.output('out.csv')
-
-    runtmp.sourmash('scripts', 'multisearch', sigs, sigs,
-                    '-o', output, '--moltype', 'dayhoff',
-                    '-k', '19', '--scaled', '100', '--ani')
+    sigs = get_test_data("dayhoff.zip")
+
+    output = runtmp.output("out.csv")
+
+    runtmp.sourmash(
+        "scripts",
+        "multisearch",
+        sigs,
+        sigs,
+        "-o",
+        output,
+        "--moltype",
+        "dayhoff",
+        "-k",
+        "19",
+        "--scaled",
+        "100",
+        "--ani",
+    )
     assert os.path.exists(output)
 
     df = pandas.read_csv(output)
     assert len(df) == 4
 
-    dd = df.to_dict(orient='index')
+    dd = df.to_dict(orient="index")
     print(dd)
 
     for idx, row in dd.items():
         # identical?
-        if row['match_name'] == row['query_name']:
-            assert row['query_md5'] == row['match_md5'], row
-            assert float(row['containment'] == 1.0)
-            assert float(row['jaccard'] == 1.0)
-            assert float(row['max_containment'] == 1.0)
-            assert float(row['query_containment_ani'] == 1.0)
-            assert float(row['match_containment_ani'] == 1.0)
-            assert float(row['average_containment_ani'] == 1.0)
-            assert float(row['max_containment_ani'] == 1.0)
+        if row["match_name"] == row["query_name"]:
+            assert row["query_md5"] == row["match_md5"], row
+            assert float(row["containment"] == 1.0)
+            assert float(row["jaccard"] == 1.0)
+            assert float(row["max_containment"] == 1.0)
+            assert float(row["query_containment_ani"] == 1.0)
+            assert float(row["match_containment_ani"] == 1.0)
+            assert float(row["average_containment_ani"] == 1.0)
+            assert float(row["max_containment_ani"] == 1.0)
 
         else:
             # confirm hand-checked numbers
-            q = row['query_name'].split()[0]
-            m = row['match_name'].split()[0]
-            cont = float(row['containment'])
-            jaccard = float(row['jaccard'])
-            maxcont = float(row['max_containment'])
-            intersect_hashes = int(row['intersect_hashes'])
-            q1_ani = float(row['query_containment_ani'])
-            q2_ani = float(row['match_containment_ani'])
-            avg_ani = float(row['average_containment_ani'])
-            max_ani = float(row['max_containment_ani'])
+            q = row["query_name"].split()[0]
+            m = row["match_name"].split()[0]
+            cont = float(row["containment"])
+            jaccard = float(row["jaccard"])
+            maxcont = float(row["max_containment"])
+            intersect_hashes = int(row["intersect_hashes"])
+            q1_ani = float(row["query_containment_ani"])
+            q2_ani = float(row["match_containment_ani"])
+            avg_ani = float(row["average_containment_ani"])
+            max_ani = float(row["max_containment_ani"])
 
             jaccard = round(jaccard, 4)
             cont = round(cont, 4)
@@ -934,9 +1005,20 @@ def test_simple_dayhoff(runtmp):
             q2_ani = round(q2_ani, 4)
             avg_ani = round(avg_ani, 4)
             max_ani = round(max_ani, 4)
-            print(q, m, f"{jaccard:.04}", f"{cont:.04}", f"{maxcont:.04}", intersect_hashes, f"{q1_ani:.04}", f"{q2_ani:.04}", f"{avg_ani:.04}", f"{max_ani:.04}")
-
-            if q == 'GCA_001593925' and m == 'GCA_001593935':
+            print(
+                q,
+                m,
+                f"{jaccard:.04}",
+                f"{cont:.04}",
+                f"{maxcont:.04}",
+                intersect_hashes,
+                f"{q1_ani:.04}",
+                f"{q2_ani:.04}",
+                f"{avg_ani:.04}",
+                f"{max_ani:.04}",
+            )
+
+            if q == "GCA_001593925" and m == "GCA_001593935":
                 assert jaccard == 0.1326
                 assert cont == 0.2815
                 assert maxcont == 0.2815
@@ -946,7 +1028,7 @@ def test_simple_dayhoff(runtmp):
                 assert avg_ani == 0.9272
                 assert max_ani == 0.9355
 
-            if q == 'GCA_001593935' and m == 'GCA_001593925':
+            if q == "GCA_001593935" and m == "GCA_001593925":
                 assert jaccard == 0.1326
                 assert cont == 0.2004
                 assert maxcont == 0.2815
@@ -959,45 +1041,57 @@ def test_simple_dayhoff(runtmp):
 
 def test_simple_hp(runtmp):
     # test basic execution with hp sigs
-    sigs = get_test_data('hp.zip')
-
-    output = runtmp.output('out.csv')
-
-    runtmp.sourmash('scripts', 'multisearch', sigs, sigs,
-                    '-o', output, '--moltype', 'hp',
-                    '-k', '19', '--scaled', '100', '--ani')
+    sigs = get_test_data("hp.zip")
+
+    output = runtmp.output("out.csv")
+
+    runtmp.sourmash(
+        "scripts",
+        "multisearch",
+        sigs,
+        sigs,
+        "-o",
+        output,
+        "--moltype",
+        "hp",
+        "-k",
+        "19",
+        "--scaled",
+        "100",
+        "--ani",
+    )
     assert os.path.exists(output)
 
     df = pandas.read_csv(output)
     assert len(df) == 4
 
-    dd = df.to_dict(orient='index')
+    dd = df.to_dict(orient="index")
     print(dd)
 
     for idx, row in dd.items():
         # identical?
-        if row['match_name'] == row['query_name']:
-            assert row['query_md5'] == row['match_md5'], row
-            assert float(row['containment'] == 1.0)
-            assert float(row['jaccard'] == 1.0)
-            assert float(row['max_containment'] == 1.0)
-            assert float(row['query_containment_ani'] == 1.0)
-            assert float(row['match_containment_ani'] == 1.0)
-            assert float(row['average_containment_ani'] == 1.0)
-            assert float(row['max_containment_ani'] == 1.0)
+        if row["match_name"] == row["query_name"]:
+            assert row["query_md5"] == row["match_md5"], row
+            assert float(row["containment"] == 1.0)
+            assert float(row["jaccard"] == 1.0)
+            assert float(row["max_containment"] == 1.0)
+            assert float(row["query_containment_ani"] == 1.0)
+            assert float(row["match_containment_ani"] == 1.0)
+            assert float(row["average_containment_ani"] == 1.0)
+            assert float(row["max_containment_ani"] == 1.0)
 
         else:
             # confirm hand-checked numbers
-            q = row['query_name'].split()[0]
-            m = row['match_name'].split()[0]
-            cont = float(row['containment'])
-            jaccard = float(row['jaccard'])
-            maxcont = float(row['max_containment'])
-            intersect_hashes = int(row['intersect_hashes'])
-            q1_ani = float(row['query_containment_ani'])
-            q2_ani = float(row['match_containment_ani'])
-            avg_ani = float(row['average_containment_ani'])
-            max_ani = float(row['max_containment_ani'])
+            q = row["query_name"].split()[0]
+            m = row["match_name"].split()[0]
+            cont = float(row["containment"])
+            jaccard = float(row["jaccard"])
+            maxcont = float(row["max_containment"])
+            intersect_hashes = int(row["intersect_hashes"])
+            q1_ani = float(row["query_containment_ani"])
+            q2_ani = float(row["match_containment_ani"])
+            avg_ani = float(row["average_containment_ani"])
+            max_ani = float(row["max_containment_ani"])
 
             jaccard = round(jaccard, 4)
             cont = round(cont, 4)
@@ -1006,9 +1100,20 @@ def test_simple_hp(runtmp):
             q2_ani = round(q2_ani, 4)
             avg_ani = round(avg_ani, 4)
             max_ani = round(max_ani, 4)
-            print(q, m, f"{jaccard:.04}", f"{cont:.04}", f"{maxcont:.04}", intersect_hashes, f"{q1_ani:.04}", f"{q2_ani:.04}", f"{avg_ani:.04}", f"{max_ani:.04}")
-
-            if q == 'GCA_001593925' and m == 'GCA_001593935':
+            print(
+                q,
+                m,
+                f"{jaccard:.04}",
+                f"{cont:.04}",
+                f"{maxcont:.04}",
+                intersect_hashes,
+                f"{q1_ani:.04}",
+                f"{q2_ani:.04}",
+                f"{avg_ani:.04}",
+                f"{max_ani:.04}",
+            )
+
+            if q == "GCA_001593925" and m == "GCA_001593935":
                 assert jaccard == 0.4983
                 assert cont == 0.747
                 assert maxcont == 0.747
@@ -1018,7 +1123,7 @@ def test_simple_hp(runtmp):
                 assert avg_ani == 0.9791
                 assert max_ani == 0.9848
 
-            if q == 'GCA_001593935' and m == 'GCA_001593925':
+            if q == "GCA_001593935" and m == "GCA_001593925":
                 assert jaccard == 0.4983
                 assert cont == 0.5994
                 assert maxcont == 0.747
@@ -1031,34 +1136,43 @@ def test_simple_hp(runtmp):
 
 def test_simple_below_threshold(runtmp):
     # test basic execution!
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(query_list, [sig2, sig47, sig63])
     make_file_list(against_list, [sig2, sig47, sig63])
 
-    output = runtmp.output('out.csv')
-
-    runtmp.sourmash('scripts', 'multisearch', query_list, against_list,
-                    '-o', output, '--ani', '--threshold', '0.5')
+    output = runtmp.output("out.csv")
+
+    runtmp.sourmash(
+        "scripts",
+        "multisearch",
+        query_list,
+        against_list,
+        "-o",
+        output,
+        "--ani",
+        "--threshold",
+        "0.5",
+    )
     assert os.path.exists(output)
 
-    with open(output, 'r') as csvfile:
+    with open(output, "r") as csvfile:
         reader = csv.DictReader(csvfile)
         rows = list(reader)
         assert len(rows) == 3
         for row in rows:
             # only identical reported
             print(row)
-            assert row['query_md5'] == row['match_md5']
-            assert float(row['containment']) == 1.0
-            assert float(row['jaccard']) == 1.0
-            assert float(row['max_containment']) == 1.0
-            assert float(row['query_containment_ani']) == 1.0
-            assert float(row['match_containment_ani']) == 1.0
-            assert float(row['average_containment_ani']) == 1.0
-            assert float(row['max_containment_ani']) == 1.0
+            assert row["query_md5"] == row["match_md5"]
+            assert float(row["containment"]) == 1.0
+            assert float(row["jaccard"]) == 1.0
+            assert float(row["max_containment"]) == 1.0
+            assert float(row["query_containment_ani"]) == 1.0
+            assert float(row["match_containment_ani"]) == 1.0
+            assert float(row["average_containment_ani"]) == 1.0
+            assert float(row["max_containment_ani"]) == 1.0
diff --git a/src/python/tests/test_pairwise.py b/src/python/tests/test_pairwise.py
index cba2a297..bd54c5cd 100644
--- a/src/python/tests/test_pairwise.py
+++ b/src/python/tests/test_pairwise.py
@@ -5,64 +5,67 @@
 import sourmash
 
 from . import sourmash_tst_utils as utils
-from .sourmash_tst_utils import (get_test_data, make_file_list, zip_siglist,
-                                 index_siglist)
+from .sourmash_tst_utils import (
+    get_test_data,
+    make_file_list,
+    zip_siglist,
+    index_siglist,
+)
 
 
 def test_installed(runtmp):
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'pairwise')
+        runtmp.sourmash("scripts", "pairwise")
 
-    assert 'usage:  pairwise' in runtmp.last_result.err
+    assert "usage:  pairwise" in runtmp.last_result.err
 
 
 def test_simple_no_ani(runtmp, capfd, zip_query, indexed):
     # test basic execution!
-    query_list = runtmp.output('query.txt')
+    query_list = runtmp.output("query.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(query_list, [sig2, sig47, sig63])
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
     if zip_query:
-        query_list = zip_siglist(runtmp, query_list, runtmp.output('query.zip'))
+        query_list = zip_siglist(runtmp, query_list, runtmp.output("query.zip"))
 
     if indexed:
-        query_list = index_siglist(runtmp, query_list, runtmp.output('db'))
+        query_list = index_siglist(runtmp, query_list, runtmp.output("db"))
 
-    runtmp.sourmash('scripts', 'pairwise', query_list,
-                    '-o', output, '-t', '-1')
+    runtmp.sourmash("scripts", "pairwise", query_list, "-o", output, "-t", "-1")
     assert os.path.exists(output)
 
     df = pandas.read_csv(output)
     assert len(df) == 3
 
-    dd = df.to_dict(orient='index')
+    dd = df.to_dict(orient="index")
     print(dd)
 
     for idx, row in dd.items():
         # confirm hand-checked numbers
-        q = row['query_name'].split()[0]
-        m = row['match_name'].split()[0]
-        cont = float(row['containment'])
-        jaccard = float(row['jaccard'])
-        maxcont = float(row['max_containment'])
-        intersect_hashes = int(row['intersect_hashes'])
-        assert 'query_containment_ani' not in row
-        assert 'match_containment_ani' not in row
-        assert 'average_containment_ani' not in row
-        assert 'max_containment_ani' not in row
+        q = row["query_name"].split()[0]
+        m = row["match_name"].split()[0]
+        cont = float(row["containment"])
+        jaccard = float(row["jaccard"])
+        maxcont = float(row["max_containment"])
+        intersect_hashes = int(row["intersect_hashes"])
+        assert "query_containment_ani" not in row
+        assert "match_containment_ani" not in row
+        assert "average_containment_ani" not in row
+        assert "max_containment_ani" not in row
 
         jaccard = round(jaccard, 4)
         cont = round(cont, 4)
         maxcont = round(maxcont, 4)
         print(q, m, f"{jaccard:.04}", f"{cont:.04}", f"{maxcont:.04}")
 
-        if q == 'NC_011665.1' and m == 'NC_009661.1':
+        if q == "NC_011665.1" and m == "NC_009661.1":
             assert jaccard == 0.3207
             assert cont == 0.4828
             assert maxcont == 0.4885
@@ -72,46 +75,49 @@ def test_simple_no_ani(runtmp, capfd, zip_query, indexed):
     print(captured.err)
 
     if indexed:
-        assert "WARNING: loading all sketches from a RocksDB into memory!" in captured.err
+        assert (
+            "WARNING: loading all sketches from a RocksDB into memory!" in captured.err
+        )
 
 
 def test_simple_ani(runtmp, zip_query):
     # test basic execution!
-    query_list = runtmp.output('query.txt')
+    query_list = runtmp.output("query.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(query_list, [sig2, sig47, sig63])
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
     if zip_query:
-        query_list = zip_siglist(runtmp, query_list, runtmp.output('query.zip'))
+        query_list = zip_siglist(runtmp, query_list, runtmp.output("query.zip"))
 
-    runtmp.sourmash('scripts', 'pairwise', query_list,
-                    '-o', output, '-t', '-1', '--ani')
+    runtmp.sourmash(
+        "scripts", "pairwise", query_list, "-o", output, "-t", "-1", "--ani"
+    )
     assert os.path.exists(output)
 
     df = pandas.read_csv(output)
     assert len(df) == 3
 
-    dd = df.to_dict(orient='index')
+    dd = df.to_dict(orient="index")
     print(dd)
 
     for idx, row in dd.items():
         # confirm hand-checked numbers
-        q = row['query_name'].split()[0]
-        m = row['match_name'].split()[0]
-        cont = float(row['containment'])
-        jaccard = float(row['jaccard'])
-        maxcont = float(row['max_containment'])
-        intersect_hashes = int(row['intersect_hashes'])
-        q1_ani = float(row['query_containment_ani'])
-        q2_ani = float(row['match_containment_ani'])
-        avg_ani = float(row['average_containment_ani'])
-        max_ani = float(row['max_containment_ani'])
+        q = row["query_name"].split()[0]
+        m = row["match_name"].split()[0]
+        cont = float(row["containment"])
+        jaccard = float(row["jaccard"])
+        maxcont = float(row["max_containment"])
+        intersect_hashes = int(row["intersect_hashes"])
+        q1_ani = float(row["query_containment_ani"])
+        q2_ani = float(row["match_containment_ani"])
+        avg_ani = float(row["average_containment_ani"])
+        max_ani = float(row["max_containment_ani"])
 
         jaccard = round(jaccard, 4)
         cont = round(cont, 4)
@@ -120,9 +126,19 @@ def test_simple_ani(runtmp, zip_query):
         q2_ani = round(q2_ani, 4)
         avg_ani = round(avg_ani, 4)
         max_ani = round(max_ani, 4)
-        print(q, m, f"{jaccard:.04}", f"{cont:.04}", f"{maxcont:.04}", f"{q1_ani:.04}", f"{q2_ani:.04}", f"{avg_ani:.04}", f"{max_ani:.04}")
-
-        if q == 'NC_011665.1' and m == 'NC_009661.1':
+        print(
+            q,
+            m,
+            f"{jaccard:.04}",
+            f"{cont:.04}",
+            f"{maxcont:.04}",
+            f"{q1_ani:.04}",
+            f"{q2_ani:.04}",
+            f"{avg_ani:.04}",
+            f"{max_ani:.04}",
+        )
+
+        if q == "NC_011665.1" and m == "NC_009661.1":
             assert jaccard == 0.3207
             assert cont == 0.4828
             assert maxcont == 0.4885
@@ -135,22 +151,20 @@ def test_simple_ani(runtmp, zip_query):
 
 def test_simple_threshold(runtmp, zip_query):
     # test with a simple threshold => only 3 results
-    query_list = runtmp.output('query.txt')
+    query_list = runtmp.output("query.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(query_list, [sig2, sig47, sig63])
 
-    output = runtmp.output('out.csv')
-
+    output = runtmp.output("out.csv")
 
     if zip_query:
-        query_list = zip_siglist(runtmp, query_list, runtmp.output('query.zip'))
+        query_list = zip_siglist(runtmp, query_list, runtmp.output("query.zip"))
 
-    runtmp.sourmash('scripts', 'pairwise', query_list,
-                    '-o', output, '-t', '0.1')
+    runtmp.sourmash("scripts", "pairwise", query_list, "-o", output, "-t", "0.1")
     assert os.path.exists(output)
 
     df = pandas.read_csv(output)
@@ -159,22 +173,21 @@ def test_simple_threshold(runtmp, zip_query):
 
 def test_simple_manifest(runtmp):
     # test with a simple threshold => only 3 results
-    query_list = runtmp.output('query.txt')
+    query_list = runtmp.output("query.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(query_list, [sig2, sig47, sig63])
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
-    query_mf = runtmp.output('qmf.csv')
+    query_mf = runtmp.output("qmf.csv")
 
     runtmp.sourmash("sig", "manifest", query_list, "-o", query_mf)
 
-    runtmp.sourmash('scripts', 'pairwise', query_mf,
-                    '-o', output, '-t', '0.1')
+    runtmp.sourmash("scripts", "pairwise", query_mf, "-o", output, "-t", "0.1")
     assert os.path.exists(output)
 
     df = pandas.read_csv(output)
@@ -183,13 +196,12 @@ def test_simple_manifest(runtmp):
 
 def test_sig_query(runtmp, capfd):
     # sig query is ok now, but fails bc only one sig
-    sig2 = get_test_data('2.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'pairwise', sig2,
-                        '-o', output)
+        runtmp.sourmash("scripts", "pairwise", sig2, "-o", output)
 
     captured = capfd.readouterr()
     print(captured.err)
@@ -198,232 +210,240 @@ def test_sig_query(runtmp, capfd):
 
 def test_bad_query(runtmp, capfd):
     # test with a bad query list (a missing file)
-    query_list = runtmp.output('query.txt')
+    query_list = runtmp.output("query.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
     make_file_list(query_list, [sig2, sig47, "no-exist"])
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
-    runtmp.sourmash('scripts', 'pairwise', query_list,
-                    '-o', output)
+    runtmp.sourmash("scripts", "pairwise", query_list, "-o", output)
 
     captured = capfd.readouterr()
     print(captured.err)
 
     assert "WARNING: could not load sketches from path 'no-exist'" in captured.err
-    assert "WARNING: 1 analysis paths failed to load. See error messages above." in captured.err
+    assert (
+        "WARNING: 1 analysis paths failed to load. See error messages above."
+        in captured.err
+    )
 
 
 def test_bad_query_2(runtmp, capfd):
     # test with a bad query (a .sig.gz file renamed as zip file)
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
-    query_zip = runtmp.output('query.zip')
+    query_zip = runtmp.output("query.zip")
     # cp sig2 into query_zip
-    with open(query_zip, 'wb') as fp:
-        with open(sig2, 'rb') as fp2:
+    with open(query_zip, "wb") as fp:
+        with open(sig2, "rb") as fp2:
             fp.write(fp2.read())
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'pairwise', query_zip,
-                        '-o', output)
+        runtmp.sourmash("scripts", "pairwise", query_zip, "-o", output)
 
     captured = capfd.readouterr()
     print(captured.err)
 
-    assert 'InvalidArchive' in captured.err
+    assert "InvalidArchive" in captured.err
 
 
 def test_missing_query(runtmp, capfd, zip_db):
     # test with a missing query list
-    query_list = runtmp.output('query.txt')
+    query_list = runtmp.output("query.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'pairwise', query_list,
-                        '-o', output)
-        
+        runtmp.sourmash("scripts", "pairwise", query_list, "-o", output)
+
     captured = capfd.readouterr()
     print(captured.err)
 
-    assert 'Error: No such file or directory' in captured.err
-
+    assert "Error: No such file or directory" in captured.err
 
 
 def test_empty_query(runtmp, capfd):
     # test with an empty query list
-    query_list = runtmp.output('query.txt')
+    query_list = runtmp.output("query.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(query_list, [])
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'pairwise', query_list,
-                        '-o', output)
+        runtmp.sourmash("scripts", "pairwise", query_list, "-o", output)
 
     captured = capfd.readouterr()
-    assert 'Error: No analysis signatures loaded, exiting.' in captured.err
+    assert "Error: No analysis signatures loaded, exiting." in captured.err
 
 
 def test_nomatch_query_warn(runtmp, capfd, zip_query):
     # test a non-matching (diff ksize) in query; do we get warning message?
-    query_list = runtmp.output('query.txt')
+    query_list = runtmp.output("query.txt")
 
-    sig1 = get_test_data('1.fa.k21.sig.gz')
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig1 = get_test_data("1.fa.k21.sig.gz")
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(query_list, [sig2, sig47, sig63, sig1])
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
     if zip_query:
-        query_list = zip_siglist(runtmp, query_list, runtmp.output('query.zip'))
+        query_list = zip_siglist(runtmp, query_list, runtmp.output("query.zip"))
 
-    runtmp.sourmash('scripts', 'pairwise', query_list,
-                    '-o', output)
+    runtmp.sourmash("scripts", "pairwise", query_list, "-o", output)
     assert os.path.exists(output)
 
     captured = capfd.readouterr()
     print(captured.err)
 
-    assert 'WARNING: skipped 1 analysis paths - no compatible signatures' in captured.err
+    assert (
+        "WARNING: skipped 1 analysis paths - no compatible signatures" in captured.err
+    )
 
 
 def test_nomatch_query_exit(runtmp, capfd, zip_query):
     # test a non-matching (diff ksize) in query; do we get warning message?
-    query_list = runtmp.output('query.txt')
+    query_list = runtmp.output("query.txt")
 
-    sig1 = get_test_data('1.fa.k21.sig.gz')
-    sig2 = get_test_data('2.fa.k21.sig.gz')
+    sig1 = get_test_data("1.fa.k21.sig.gz")
+    sig2 = get_test_data("2.fa.k21.sig.gz")
 
     make_file_list(query_list, [sig1, sig2])
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
     if zip_query:
-        query_list = zip_siglist(runtmp, query_list, runtmp.output('query.zip'))
+        query_list = zip_siglist(runtmp, query_list, runtmp.output("query.zip"))
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'pairwise', query_list,
-                        '-o', output)
+        runtmp.sourmash("scripts", "pairwise", query_list, "-o", output)
 
     captured = capfd.readouterr()
     print(captured.err)
 
-    assert 'WARNING: skipped 2 analysis paths - no compatible signatures' in captured.err
-    assert 'Error: No analysis signatures loaded, exiting.' in captured.err
+    assert (
+        "WARNING: skipped 2 analysis paths - no compatible signatures" in captured.err
+    )
+    assert "Error: No analysis signatures loaded, exiting." in captured.err
 
 
 def test_load_only_one_bug(runtmp, capfd, zip_db):
     # check that we behave properly when presented with multiple query
     # sketches
-    query_list = runtmp.output('query.txt')
+    query_list = runtmp.output("query.txt")
 
-    sig1_k31 = get_test_data('1.fa.k31.sig.gz')
+    sig1_k31 = get_test_data("1.fa.k31.sig.gz")
 
     # note: this was created as a 3-sketch-in-one-signature directly
     # via sourmash sketch dna -p k=21,k=31,k=51.
-    sig1_all = get_test_data('1.combined.sig.gz')
+    sig1_all = get_test_data("1.combined.sig.gz")
 
     make_file_list(query_list, [sig1_all, sig1_k31])
 
     if zip_db:
-        query_list = zip_siglist(runtmp, query_list, runtmp.output('db.zip'))
+        query_list = zip_siglist(runtmp, query_list, runtmp.output("db.zip"))
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
-    runtmp.sourmash('scripts', 'pairwise', query_list,
-                    '-o', output)
+    runtmp.sourmash("scripts", "pairwise", query_list, "-o", output)
     assert os.path.exists(output)
 
     captured = capfd.readouterr()
     print(captured.err)
 
-    assert not 'WARNING: skipped 1 paths - no compatible signatures.' in captured.err
-    assert not 'WARNING: no compatible sketches in path ' in captured.err
+    assert not "WARNING: skipped 1 paths - no compatible signatures." in captured.err
+    assert not "WARNING: no compatible sketches in path " in captured.err
 
 
 def test_md5(runtmp, zip_query):
     # test that md5s match what was in the original files, not downsampled etc.
-    query_list = runtmp.output('query.txt')
+    query_list = runtmp.output("query.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(query_list, [sig2, sig47, sig63])
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
     if zip_query:
-        query_list = zip_siglist(runtmp, query_list, runtmp.output('query.zip'))
+        query_list = zip_siglist(runtmp, query_list, runtmp.output("query.zip"))
 
-
-    runtmp.sourmash('scripts', 'pairwise', query_list,
-                    '-o', output, "-t", "-0.1")
+    runtmp.sourmash("scripts", "pairwise", query_list, "-o", output, "-t", "-0.1")
     assert os.path.exists(output)
 
     df = pandas.read_csv(output)
     assert len(df) == 3
 
-    md5s = list(df['query_md5']) + list(df['match_md5'])
+    md5s = list(df["query_md5"]) + list(df["match_md5"])
     print(f"md5s: {md5s}")
 
     for query_file in (sig2, sig47, sig63):
         for ss in sourmash.load_file_as_signatures(query_file, ksize=31):
             assert ss.md5sum() in md5s
 
-    md5s = list(df['match_md5'])
+    md5s = list(df["match_md5"])
     print(md5s)
 
 
 def test_simple_prot_ani(runtmp):
     # test basic execution with protein sigs
-    sigs = get_test_data('protein.zip')
-
-    output = runtmp.output('out.csv')
-
-    runtmp.sourmash('scripts', 'pairwise', sigs,
-                    '-o', output, '--moltype', 'protein',
-                    '-k', '19', '--scaled', '100', '--ani')
+    sigs = get_test_data("protein.zip")
+
+    output = runtmp.output("out.csv")
+
+    runtmp.sourmash(
+        "scripts",
+        "pairwise",
+        sigs,
+        "-o",
+        output,
+        "--moltype",
+        "protein",
+        "-k",
+        "19",
+        "--scaled",
+        "100",
+        "--ani",
+    )
     assert os.path.exists(output)
 
     df = pandas.read_csv(output)
     assert len(df) == 1
 
-    dd = df.to_dict(orient='index')
+    dd = df.to_dict(orient="index")
     print(dd)
 
     for idx, row in dd.items():
         # confirm hand-checked numbers
-        q = row['query_name'].split()[0]
-        m = row['match_name'].split()[0]
-        cont = float(row['containment'])
-        jaccard = float(row['jaccard'])
-        maxcont = float(row['max_containment'])
-        intersect_hashes = int(row['intersect_hashes'])
-        q1_ani = float(row['query_containment_ani'])
-        q2_ani = float(row['match_containment_ani'])
-        avg_ani = float(row['average_containment_ani'])
-        max_ani = float(row['max_containment_ani'])
+        q = row["query_name"].split()[0]
+        m = row["match_name"].split()[0]
+        cont = float(row["containment"])
+        jaccard = float(row["jaccard"])
+        maxcont = float(row["max_containment"])
+        intersect_hashes = int(row["intersect_hashes"])
+        q1_ani = float(row["query_containment_ani"])
+        q2_ani = float(row["match_containment_ani"])
+        avg_ani = float(row["average_containment_ani"])
+        max_ani = float(row["max_containment_ani"])
 
         jaccard = round(jaccard, 4)
         cont = round(cont, 4)
@@ -432,9 +452,20 @@ def test_simple_prot_ani(runtmp):
         q2_ani = round(q2_ani, 4)
         avg_ani = round(avg_ani, 4)
         max_ani = round(max_ani, 4)
-        print(q, m, f"{jaccard:.04}", f"{cont:.04}", f"{maxcont:.04}", intersect_hashes, f"{q1_ani:.04}", f"{q2_ani:.04}", f"{avg_ani:.04}", f"{max_ani:.04}")
-
-        if q == 'GCA_001593925' and m == 'GCA_001593935':
+        print(
+            q,
+            m,
+            f"{jaccard:.04}",
+            f"{cont:.04}",
+            f"{maxcont:.04}",
+            intersect_hashes,
+            f"{q1_ani:.04}",
+            f"{q2_ani:.04}",
+            f"{avg_ani:.04}",
+            f"{max_ani:.04}",
+        )
+
+        if q == "GCA_001593925" and m == "GCA_001593935":
             assert jaccard == 0.0434
             assert cont == 0.1003
             assert maxcont == 0.1003
@@ -447,33 +478,44 @@ def test_simple_prot_ani(runtmp):
 
 def test_simple_dayhoff_ani(runtmp):
     # test basic execution with dayhoff sigs
-    sigs = get_test_data('dayhoff.zip')
-
-    output = runtmp.output('out.csv')
-
-    runtmp.sourmash('scripts', 'pairwise', sigs,
-                    '-o', output, '--moltype', 'dayhoff',
-                    '-k', '19', '--scaled', '100', '--ani')
+    sigs = get_test_data("dayhoff.zip")
+
+    output = runtmp.output("out.csv")
+
+    runtmp.sourmash(
+        "scripts",
+        "pairwise",
+        sigs,
+        "-o",
+        output,
+        "--moltype",
+        "dayhoff",
+        "-k",
+        "19",
+        "--scaled",
+        "100",
+        "--ani",
+    )
     assert os.path.exists(output)
 
     df = pandas.read_csv(output)
     assert len(df) == 1
 
-    dd = df.to_dict(orient='index')
+    dd = df.to_dict(orient="index")
     print(dd)
 
     for idx, row in dd.items():
         # confirm hand-checked numbers
-        q = row['query_name'].split()[0]
-        m = row['match_name'].split()[0]
-        cont = float(row['containment'])
-        jaccard = float(row['jaccard'])
-        maxcont = float(row['max_containment'])
-        intersect_hashes = int(row['intersect_hashes'])
-        q1_ani = float(row['query_containment_ani'])
-        q2_ani = float(row['match_containment_ani'])
-        avg_ani = float(row['average_containment_ani'])
-        max_ani = float(row['max_containment_ani'])
+        q = row["query_name"].split()[0]
+        m = row["match_name"].split()[0]
+        cont = float(row["containment"])
+        jaccard = float(row["jaccard"])
+        maxcont = float(row["max_containment"])
+        intersect_hashes = int(row["intersect_hashes"])
+        q1_ani = float(row["query_containment_ani"])
+        q2_ani = float(row["match_containment_ani"])
+        avg_ani = float(row["average_containment_ani"])
+        max_ani = float(row["max_containment_ani"])
 
         jaccard = round(jaccard, 4)
         cont = round(cont, 4)
@@ -482,9 +524,20 @@ def test_simple_dayhoff_ani(runtmp):
         q2_ani = round(q2_ani, 4)
         avg_ani = round(avg_ani, 4)
         max_ani = round(max_ani, 4)
-        print(q, m, f"{jaccard:.04}", f"{cont:.04}", f"{maxcont:.04}", intersect_hashes, f"{q1_ani:.04}", f"{q2_ani:.04}", f"{avg_ani:.04}", f"{max_ani:.04}")
-
-        if q == 'GCA_001593925' and m == 'GCA_001593935':
+        print(
+            q,
+            m,
+            f"{jaccard:.04}",
+            f"{cont:.04}",
+            f"{maxcont:.04}",
+            intersect_hashes,
+            f"{q1_ani:.04}",
+            f"{q2_ani:.04}",
+            f"{avg_ani:.04}",
+            f"{max_ani:.04}",
+        )
+
+        if q == "GCA_001593925" and m == "GCA_001593935":
             assert jaccard == 0.1326
             assert cont == 0.2815
             assert maxcont == 0.2815
@@ -497,33 +550,44 @@ def test_simple_dayhoff_ani(runtmp):
 
 def test_simple_hp_ani(runtmp):
     # test basic execution with hp sigs
-    sigs = get_test_data('hp.zip')
-
-    output = runtmp.output('out.csv')
-
-    runtmp.sourmash('scripts', 'pairwise', sigs,
-                    '-o', output, '--moltype', 'hp',
-                    '-k', '19', '--scaled', '100', '--ani')
+    sigs = get_test_data("hp.zip")
+
+    output = runtmp.output("out.csv")
+
+    runtmp.sourmash(
+        "scripts",
+        "pairwise",
+        sigs,
+        "-o",
+        output,
+        "--moltype",
+        "hp",
+        "-k",
+        "19",
+        "--scaled",
+        "100",
+        "--ani",
+    )
     assert os.path.exists(output)
 
     df = pandas.read_csv(output)
     assert len(df) == 1
 
-    dd = df.to_dict(orient='index')
+    dd = df.to_dict(orient="index")
     print(dd)
 
     for idx, row in dd.items():
         # confirm hand-checked numbers
-        q = row['query_name'].split()[0]
-        m = row['match_name'].split()[0]
-        cont = float(row['containment'])
-        jaccard = float(row['jaccard'])
-        maxcont = float(row['max_containment'])
-        intersect_hashes = int(row['intersect_hashes'])
-        q1_ani = float(row['query_containment_ani'])
-        q2_ani = float(row['match_containment_ani'])
-        avg_ani = float(row['average_containment_ani'])
-        max_ani = float(row['max_containment_ani'])
+        q = row["query_name"].split()[0]
+        m = row["match_name"].split()[0]
+        cont = float(row["containment"])
+        jaccard = float(row["jaccard"])
+        maxcont = float(row["max_containment"])
+        intersect_hashes = int(row["intersect_hashes"])
+        q1_ani = float(row["query_containment_ani"])
+        q2_ani = float(row["match_containment_ani"])
+        avg_ani = float(row["average_containment_ani"])
+        max_ani = float(row["max_containment_ani"])
 
         jaccard = round(jaccard, 4)
         cont = round(cont, 4)
@@ -532,9 +596,20 @@ def test_simple_hp_ani(runtmp):
         q2_ani = round(q2_ani, 4)
         avg_ani = round(avg_ani, 4)
         max_ani = round(max_ani, 4)
-        print(q, m, f"{jaccard:.04}", f"{cont:.04}", f"{maxcont:.04}", intersect_hashes, f"{q1_ani:.04}", f"{q2_ani:.04}", f"{avg_ani:.04}", f"{max_ani:.04}")
-
-        if q == 'GCA_001593925' and m == 'GCA_001593935':
+        print(
+            q,
+            m,
+            f"{jaccard:.04}",
+            f"{cont:.04}",
+            f"{maxcont:.04}",
+            intersect_hashes,
+            f"{q1_ani:.04}",
+            f"{q2_ani:.04}",
+            f"{avg_ani:.04}",
+            f"{max_ani:.04}",
+        )
+
+        if q == "GCA_001593925" and m == "GCA_001593935":
             assert jaccard == 0.4983
             assert cont == 0.747
             assert maxcont == 0.747
@@ -547,22 +622,23 @@ def test_simple_hp_ani(runtmp):
 
 def test_simple_below_threshold(runtmp):
     # test basic execution!
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(query_list, [sig2, sig47, sig63])
 
-    output = runtmp.output('out.csv')
+    output = runtmp.output("out.csv")
 
-    runtmp.sourmash('scripts', 'pairwise', query_list,
-                    '-o', output, '--ani', '--threshold', '0.5')
+    runtmp.sourmash(
+        "scripts", "pairwise", query_list, "-o", output, "--ani", "--threshold", "0.5"
+    )
     assert os.path.exists(output)
 
-    with open(output, 'r') as csvfile:
+    with open(output, "r") as csvfile:
         reader = csv.reader(csvfile)
         rows = list(reader)
         print(rows)
@@ -571,69 +647,84 @@ def test_simple_below_threshold(runtmp):
 
 def test_simple_below_threshold_write_all(runtmp):
     # test basic execution!
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(query_list, [sig2, sig47, sig63])
 
-    output = runtmp.output('out.csv')
-
-    runtmp.sourmash('scripts', 'pairwise', query_list,
-                    '-o', output, '--ani', '--threshold', '0.5',
-                    '--write-all')
+    output = runtmp.output("out.csv")
+
+    runtmp.sourmash(
+        "scripts",
+        "pairwise",
+        query_list,
+        "-o",
+        output,
+        "--ani",
+        "--threshold",
+        "0.5",
+        "--write-all",
+    )
     assert os.path.exists(output)
 
-    with open(output, 'r') as csvfile:
+    with open(output, "r") as csvfile:
         reader = csv.DictReader(csvfile)
         rows = list(reader)
         print(rows)
         assert len(rows) == 3
         for row in rows:
-            assert float(row['query_containment_ani']) == 1.0
-            assert float(row['match_containment_ani']) == 1.0
-            assert float(row['average_containment_ani']) == 1.0
-            assert float(row['max_containment_ani']) == 1.0
-            assert float(row['containment']) == 1.0
-            assert float(row['max_containment']) == 1.0
-            assert float(row['jaccard']) == 1.0
-            assert row['query_name'] == row['match_name']
-            assert row['query_md5'] == row['match_md5']
+            assert float(row["query_containment_ani"]) == 1.0
+            assert float(row["match_containment_ani"]) == 1.0
+            assert float(row["average_containment_ani"]) == 1.0
+            assert float(row["max_containment_ani"]) == 1.0
+            assert float(row["containment"]) == 1.0
+            assert float(row["max_containment"]) == 1.0
+            assert float(row["jaccard"]) == 1.0
+            assert row["query_name"] == row["match_name"]
+            assert row["query_md5"] == row["match_md5"]
 
 
 def test_simple_below_threshold_write_all_no_ani(runtmp):
     # test basic execution!
-    query_list = runtmp.output('query.txt')
-    against_list = runtmp.output('against.txt')
+    query_list = runtmp.output("query.txt")
+    against_list = runtmp.output("against.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_file_list(query_list, [sig2, sig47, sig63])
 
-    output = runtmp.output('out.csv')
-
-    runtmp.sourmash('scripts', 'pairwise', query_list,
-                    '-o', output, '--threshold', '0.5',
-                    '--write-all')
+    output = runtmp.output("out.csv")
+
+    runtmp.sourmash(
+        "scripts",
+        "pairwise",
+        query_list,
+        "-o",
+        output,
+        "--threshold",
+        "0.5",
+        "--write-all",
+    )
     assert os.path.exists(output)
 
-    with open(output, 'r') as csvfile:
+    with open(output, "r") as csvfile:
         reader = csv.DictReader(csvfile)
         rows = list(reader)
         print(rows)
         assert len(rows) == 3
         for row in rows:
-            assert 'query_containment_ani' not in row.keys()
-            assert 'match_containment_ani' not in row.keys()
-            assert 'average_containment_ani' not in row.keys()
-            assert 'max_containment_ani' not in row.keys()
-            assert float(row['containment']) == 1.0
-            assert float(row['max_containment']) == 1.0
-            assert float(row['jaccard']) == 1.0
-            assert row['query_name'] == row['match_name']
-            assert row['query_md5'] == row['match_md5']
+            assert "query_containment_ani" not in row.keys()
+            assert "match_containment_ani" not in row.keys()
+            assert "average_containment_ani" not in row.keys()
+            assert "max_containment_ani" not in row.keys()
+            assert float(row["containment"]) == 1.0
+            assert float(row["max_containment"]) == 1.0
+            assert float(row["jaccard"]) == 1.0
+            assert row["query_name"] == row["match_name"]
+            assert row["query_md5"] == row["match_md5"]
diff --git a/src/python/tests/test_sketch.py b/src/python/tests/test_sketch.py
index 98f08058..3c610a56 100644
--- a/src/python/tests/test_sketch.py
+++ b/src/python/tests/test_sketch.py
@@ -9,55 +9,65 @@
 
 def get_test_data(filename):
     thisdir = os.path.dirname(__file__)
-    return os.path.join(thisdir, 'test-data', filename)
+    return os.path.join(thisdir, "test-data", filename)
 
 
-def make_assembly_csv(filename, genome_paths, protein_paths = []):
+def make_assembly_csv(filename, genome_paths, protein_paths=[]):
     # equalize path lengths by adding "".
-    names = [os.path.basename(x).split('.fa')[0] for x in genome_paths]
+    names = [os.path.basename(x).split(".fa")[0] for x in genome_paths]
     if len(protein_paths) < len(genome_paths):
-        protein_paths.extend(["" for _ in range(len(genome_paths) - len(protein_paths))])
+        protein_paths.extend(
+            ["" for _ in range(len(genome_paths) - len(protein_paths))]
+        )
     elif len(genome_paths) < len(protein_paths):
         genome_paths.extend(["" for _ in range(len(protein_paths) - len(genome_paths))])
-        names = [os.path.basename(x).split('.fa')[0] for x in protein_paths]
+        names = [os.path.basename(x).split(".fa")[0] for x in protein_paths]
 
-    with open(filename, 'wt') as fp:
+    with open(filename, "wt") as fp:
         fp.write("name,genome_filename,protein_filename\n")
         for name, genome_path, protein_path in zip(names, genome_paths, protein_paths):
             fp.write("{},{},{}\n".format(name, genome_path, protein_path))
 
-def make_reads_csv(filename, reads_tuples = []):
+
+def make_reads_csv(filename, reads_tuples=[]):
     # reads tuples should be (name,read1,read2)
-    with open(filename, 'wt') as fp:
+    with open(filename, "wt") as fp:
         fp.write("name,read1,read2\n")
-        for (name, read1, read2) in reads_tuples:
+        for name, read1, read2 in reads_tuples:
             print(f"{name},{read1},{read2}")
             fp.write("{},{},{}\n".format(name, read1, read2))
 
 
 def test_installed(runtmp):
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'manysketch')
+        runtmp.sourmash("scripts", "manysketch")
 
-    assert 'usage:  manysketch' in runtmp.last_result.err
+    assert "usage:  manysketch" in runtmp.last_result.err
 
 
 def test_manysketch_simple(runtmp):
-    fa_csv = runtmp.output('db-fa.txt')
+    fa_csv = runtmp.output("db-fa.txt")
 
-    fa1 = get_test_data('short.fa')
-    fa2 = get_test_data('short2.fa')
-    fa3 = get_test_data('short3.fa')
+    fa1 = get_test_data("short.fa")
+    fa2 = get_test_data("short2.fa")
+    fa3 = get_test_data("short3.fa")
 
     make_assembly_csv(fa_csv, [fa1, fa2, fa3])
 
-    output = runtmp.output('db.zip')
+    output = runtmp.output("db.zip")
 
-    runtmp.sourmash('scripts', 'manysketch', fa_csv, '-o', output,
-                    '--param-str', "dna,k=31,scaled=1")
+    runtmp.sourmash(
+        "scripts",
+        "manysketch",
+        fa_csv,
+        "-o",
+        output,
+        "--param-str",
+        "dna,k=31,scaled=1",
+    )
 
     assert os.path.exists(output)
-    assert not runtmp.last_result.out # stdout should be empty
+    assert not runtmp.last_result.out  # stdout should be empty
 
     idx = sourmash.load_file_as_index(output)
     sigs = list(idx.signatures())
@@ -67,21 +77,28 @@ def test_manysketch_simple(runtmp):
 
 
 def test_manysketch_mult_k(runtmp):
-    fa_csv = runtmp.output('db-fa.txt')
+    fa_csv = runtmp.output("db-fa.txt")
 
-    fa1 = get_test_data('short.fa')
-    fa2 = get_test_data('short2.fa')
-    fa3 = get_test_data('short3.fa')
+    fa1 = get_test_data("short.fa")
+    fa2 = get_test_data("short2.fa")
+    fa3 = get_test_data("short3.fa")
 
     make_assembly_csv(fa_csv, [fa1, fa2, fa3])
 
-    output = runtmp.output('db.zip')
+    output = runtmp.output("db.zip")
 
-    runtmp.sourmash('scripts', 'manysketch', fa_csv, '-o', output,
-                    '--param-str', "dna,k=21,k=31,scaled=1")
+    runtmp.sourmash(
+        "scripts",
+        "manysketch",
+        fa_csv,
+        "-o",
+        output,
+        "--param-str",
+        "dna,k=21,k=31,scaled=1",
+    )
 
     assert os.path.exists(output)
-    assert not runtmp.last_result.out # stdout should be empty
+    assert not runtmp.last_result.out  # stdout should be empty
 
     idx = sourmash.load_file_as_index(output)
     sigs = list(idx.signatures())
@@ -91,29 +108,38 @@ def test_manysketch_mult_k(runtmp):
 
     names = [sig.name for sig in sigs]
     print(names)
-    assert names.count('short') == 2
-    assert names.count('short2') == 2
-    assert names.count('short3') == 2
+    assert names.count("short") == 2
+    assert names.count("short2") == 2
+    assert names.count("short3") == 2
 
 
 def test_manysketch_mult_k_2(runtmp):
-    fa_csv = runtmp.output('db-fa.txt')
+    fa_csv = runtmp.output("db-fa.txt")
 
-    fa1 = get_test_data('short.fa')
-    fa2 = get_test_data('short2.fa')
-    fa3 = get_test_data('short3.fa')
+    fa1 = get_test_data("short.fa")
+    fa2 = get_test_data("short2.fa")
+    fa3 = get_test_data("short3.fa")
 
     make_assembly_csv(fa_csv, [fa1, fa2, fa3])
 
-    output = runtmp.output('db.zip')
-
-    runtmp.sourmash('scripts', 'manysketch', fa_csv, '-o', output,
-                    '--param-str', "dna,k=21,scaled=1",
-                    '--param-str', "dna,k=31,scaled=1",
-                    '--param-str', "dna,k=21,scaled=1")
+    output = runtmp.output("db.zip")
+
+    runtmp.sourmash(
+        "scripts",
+        "manysketch",
+        fa_csv,
+        "-o",
+        output,
+        "--param-str",
+        "dna,k=21,scaled=1",
+        "--param-str",
+        "dna,k=31,scaled=1",
+        "--param-str",
+        "dna,k=21,scaled=1",
+    )
 
     assert os.path.exists(output)
-    assert not runtmp.last_result.out # stdout should be empty
+    assert not runtmp.last_result.out  # stdout should be empty
 
     idx = sourmash.load_file_as_index(output)
     sigs = list(idx.signatures())
@@ -123,29 +149,37 @@ def test_manysketch_mult_k_2(runtmp):
 
     names = [sig.name for sig in sigs]
     print(names)
-    assert names.count('short') == 2
-    assert names.count('short2') == 2
-    assert names.count('short3') == 2
+    assert names.count("short") == 2
+    assert names.count("short2") == 2
+    assert names.count("short3") == 2
 
 
 def test_manysketch_mult_moltype(runtmp):
-    fa_csv = runtmp.output('db-fa.csv')
+    fa_csv = runtmp.output("db-fa.csv")
 
-    fa1 = get_test_data('short.fa')
-    fa2 = get_test_data('short2.fa')
-    fa3 = get_test_data('short3.fa')
-    protfa1 = get_test_data('short-protein.fa')
+    fa1 = get_test_data("short.fa")
+    fa2 = get_test_data("short2.fa")
+    fa3 = get_test_data("short3.fa")
+    protfa1 = get_test_data("short-protein.fa")
 
     make_assembly_csv(fa_csv, [fa1, fa2, fa3], [protfa1])
 
-    output = runtmp.output('db.zip')
+    output = runtmp.output("db.zip")
 
-    runtmp.sourmash('scripts', 'manysketch', fa_csv, '-o', output,
-                    '--param-str', "dna,k=21,scaled=1",
-                    '--param-str', "protein,k=10,scaled=1")
+    runtmp.sourmash(
+        "scripts",
+        "manysketch",
+        fa_csv,
+        "-o",
+        output,
+        "--param-str",
+        "dna,k=21,scaled=1",
+        "--param-str",
+        "protein,k=10,scaled=1",
+    )
 
     assert os.path.exists(output)
-    assert not runtmp.last_result.out # stdout should be empty
+    assert not runtmp.last_result.out  # stdout should be empty
 
     idx = sourmash.load_file_as_index(output)
     sigs = list(idx.signatures())
@@ -154,39 +188,50 @@ def test_manysketch_mult_moltype(runtmp):
     assert len(sigs) == 4
     # check moltypes, etc!
     for sig in sigs:
-        if sig.name == 'short':
+        if sig.name == "short":
             if sig.minhash.is_dna:
                 assert sig.minhash.ksize == 21
                 assert sig.minhash.scaled == 1
                 assert sig.md5sum() == "1474578c5c46dd09da4c2df29cf86621"
             else:
-                assert sig.name == 'short'
+                assert sig.name == "short"
                 assert sig.minhash.ksize == 10
                 assert sig.minhash.scaled == 1
                 assert sig.md5sum() == "eb4467d11e0ecd2dbde4193bfc255310"
         else:
-            assert sig.name in ['short', 'short2', 'short3']
+            assert sig.name in ["short", "short2", "short3"]
             assert sig.minhash.ksize == 21
             assert sig.minhash.scaled == 1
             assert sig.minhash.is_dna
-            assert sig.md5sum() in ["4efeebd26644278e36b9553e018a851a","f85747ac4f473c4a71c1740d009f512b"]
+            assert sig.md5sum() in [
+                "4efeebd26644278e36b9553e018a851a",
+                "f85747ac4f473c4a71c1740d009f512b",
+            ]
 
 
 def test_manysketch_mult_moltype_protein(runtmp):
-    fa_csv = runtmp.output('db-fa.csv')
+    fa_csv = runtmp.output("db-fa.csv")
 
-    protfa1 = get_test_data('short-protein.fa')
+    protfa1 = get_test_data("short-protein.fa")
 
     make_assembly_csv(fa_csv, [], [protfa1])
 
-    output = runtmp.output('db.zip')
+    output = runtmp.output("db.zip")
 
-    runtmp.sourmash('scripts', 'manysketch', fa_csv, '-o', output,
-                    '--param-str', "dayhoff,k=10,scaled=1",
-                    '--param-str', "hp,k=24,scaled=1")
+    runtmp.sourmash(
+        "scripts",
+        "manysketch",
+        fa_csv,
+        "-o",
+        output,
+        "--param-str",
+        "dayhoff,k=10,scaled=1",
+        "--param-str",
+        "hp,k=24,scaled=1",
+    )
 
     assert os.path.exists(output)
-    assert not runtmp.last_result.out # stdout should be empty
+    assert not runtmp.last_result.out  # stdout should be empty
 
     idx = sourmash.load_file_as_index(output)
     sigs = list(idx.signatures())
@@ -200,57 +245,74 @@ def test_manysketch_mult_moltype_protein(runtmp):
         assert sig.name == "short-protein"
         if sig.minhash.dayhoff:
             assert sig.md5sum() == "320464775fe704d9f938a8c63d8dd722"
-            total_checked+=1
+            total_checked += 1
         elif sig.minhash.hp:
             assert sig.md5sum() == "e8ccc6ca7ad560072f51be631d1c39c0"
-            total_checked+=1
+            total_checked += 1
     assert total_checked == 2
 
 
 def test_manysketch_only_incompatible_fastas(runtmp, capfd):
     # provide dna, protein fastas, but only sketch protein (skip protein fastas!)
-    fa_csv = runtmp.output('db-fa.csv')
+    fa_csv = runtmp.output("db-fa.csv")
 
-    fa1 = get_test_data('short.fa')
-    fa2 = get_test_data('short2.fa')
-    fa3 = get_test_data('short3.fa')
+    fa1 = get_test_data("short.fa")
+    fa2 = get_test_data("short2.fa")
+    fa3 = get_test_data("short3.fa")
 
     make_assembly_csv(fa_csv, [fa1, fa2, fa3])
 
-    output = runtmp.output('db.zip')
+    output = runtmp.output("db.zip")
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'manysketch', fa_csv, '-o', output,
-                        '--param-str', "protein,k=10,scaled=1")
-
-    assert os.path.exists(output) # output will still exist - is this desired?
-    assert not runtmp.last_result.out # stdout should be empty
+        runtmp.sourmash(
+            "scripts",
+            "manysketch",
+            fa_csv,
+            "-o",
+            output,
+            "--param-str",
+            "protein,k=10,scaled=1",
+        )
+
+    assert os.path.exists(output)  # output will still exist - is this desired?
+    assert not runtmp.last_result.out  # stdout should be empty
 
     captured = capfd.readouterr()
     print(captured.err)
 
-    assert 'DONE. Processed 3 fasta files' in captured.err
-    assert 'Error: No fasta files compatible with provided sketch parameters: no signatures created.' in captured.err
+    assert "DONE. Processed 3 fasta files" in captured.err
+    assert (
+        "Error: No fasta files compatible with provided sketch parameters: no signatures created."
+        in captured.err
+    )
 
 
 def test_manysketch_skip_incompatible_fastas(runtmp, capfd):
     # provide dna, protein fastas, but only sketch protein (skip protein fastas!)
-    fa_csv = runtmp.output('db-fa.csv')
+    fa_csv = runtmp.output("db-fa.csv")
 
-    fa1 = get_test_data('short.fa')
-    fa2 = get_test_data('short2.fa')
-    fa3 = get_test_data('short3.fa')
-    protfa1 = get_test_data('short-protein.fa')
+    fa1 = get_test_data("short.fa")
+    fa2 = get_test_data("short2.fa")
+    fa3 = get_test_data("short3.fa")
+    protfa1 = get_test_data("short-protein.fa")
 
     make_assembly_csv(fa_csv, [fa1, fa2, fa3], [protfa1])
 
-    output = runtmp.output('db.zip')
+    output = runtmp.output("db.zip")
 
-    runtmp.sourmash('scripts', 'manysketch', fa_csv, '-o', output,
-                    '--param-str', "protein,k=10,scaled=1")
+    runtmp.sourmash(
+        "scripts",
+        "manysketch",
+        fa_csv,
+        "-o",
+        output,
+        "--param-str",
+        "protein,k=10,scaled=1",
+    )
 
     assert os.path.exists(output)
-    assert not runtmp.last_result.out # stdout should be empty
+    assert not runtmp.last_result.out  # stdout should be empty
 
     idx = sourmash.load_file_as_index(output)
     sigs = list(idx.signatures())
@@ -265,19 +327,18 @@ def test_manysketch_skip_incompatible_fastas(runtmp, capfd):
         assert sig.minhash.ksize == 10
         assert sig.minhash.scaled == 1
         assert sig.md5sum() == "eb4467d11e0ecd2dbde4193bfc255310"
-    assert 'DONE. Processed 4 fasta files' in captured.err
-    assert 'WARNING: 3 fasta files skipped - no compatible signatures.' in captured.err
+    assert "DONE. Processed 4 fasta files" in captured.err
+    assert "WARNING: 3 fasta files skipped - no compatible signatures." in captured.err
 
 
 def test_manysketch_missing_fa_csv(runtmp, capfd):
     # test missing fa_csv file
-    fa_csv = runtmp.output('fa_csv.txt')
-    output = runtmp.output('out.zip')
+    fa_csv = runtmp.output("fa_csv.txt")
+    output = runtmp.output("out.zip")
     # make_file_list(fa_csv, []) # don't make fa_csv file
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'manysketch', fa_csv,
-                        '-o', output)
+        runtmp.sourmash("scripts", "manysketch", fa_csv, "-o", output)
 
     captured = capfd.readouterr()
     print(captured.err)
@@ -286,18 +347,18 @@ def test_manysketch_missing_fa_csv(runtmp, capfd):
 
 def test_manysketch_bad_fa_csv(runtmp, capfd):
     # siglist instead of fastalist
-    siglist = runtmp.output('db-sigs.txt')
+    siglist = runtmp.output("db-sigs.txt")
 
-    sig2 = get_test_data('2.fa.sig.gz')
-    sig47 = get_test_data('47.fa.sig.gz')
-    sig63 = get_test_data('63.fa.sig.gz')
+    sig2 = get_test_data("2.fa.sig.gz")
+    sig47 = get_test_data("47.fa.sig.gz")
+    sig63 = get_test_data("63.fa.sig.gz")
 
     make_assembly_csv(siglist, [sig2, sig47, sig63])
 
-    output = runtmp.output('db.zip')
+    output = runtmp.output("db.zip")
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'manysketch', siglist, '-o', output) 
+        runtmp.sourmash("scripts", "manysketch", siglist, "-o", output)
 
     captured = capfd.readouterr()
     print(captured.err)
@@ -306,15 +367,15 @@ def test_manysketch_bad_fa_csv(runtmp, capfd):
 
 def test_manysketch_bad_fa_csv_2(runtmp, capfd):
     # bad file within filelist
-    siglist = runtmp.output('bad.txt')
+    siglist = runtmp.output("bad.txt")
 
     # fa_file = runtmp.output("bad.fa")
     make_assembly_csv(siglist, ["bad2.fa"])
 
-    output = runtmp.output('db.zip')
+    output = runtmp.output("db.zip")
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'manysketch', siglist, '-o', output)
+        runtmp.sourmash("scripts", "manysketch", siglist, "-o", output)
 
     captured = capfd.readouterr()
     print(captured.err)
@@ -324,13 +385,12 @@ def test_manysketch_bad_fa_csv_2(runtmp, capfd):
 
 def test_manysketch_bad_fa_csv_3(runtmp, capfd):
     # test sketch with fasta provided instead of fa_csv
-    output = runtmp.output('out.zip')
-    fa1 = get_test_data('short.fa')
+    output = runtmp.output("out.zip")
+    fa1 = get_test_data("short.fa")
     print(fa1)
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'manysketch', fa1,
-                        '-o', output)
+        runtmp.sourmash("scripts", "manysketch", fa1, "-o", output)
 
     captured = capfd.readouterr()
     print(captured.err)
@@ -340,89 +400,98 @@ def test_manysketch_bad_fa_csv_3(runtmp, capfd):
 
 def test_manysketch_bad_fa_csv_4(runtmp, capfd):
     # test sketch with improperly formatted fa_csv
-    fa_csv = runtmp.output('db-fa.csv')
+    fa_csv = runtmp.output("db-fa.csv")
 
-    fa1 = get_test_data('short.fa')
-    fa2 = get_test_data('short2.fa')
-    fa3 = get_test_data('short3.fa')
-    protfa1 = get_test_data('short-protein.fa')
+    fa1 = get_test_data("short.fa")
+    fa2 = get_test_data("short2.fa")
+    fa3 = get_test_data("short3.fa")
+    protfa1 = get_test_data("short-protein.fa")
 
     # make file csv but don't fill empty protein rows with ,""
     make_assembly_csv(fa_csv, [fa1, fa2, fa3], [protfa1])
     g_fa = [fa1, fa2, fa3]
     p_fa = [protfa1]
-    with open(fa_csv, 'wt') as fp:
+    with open(fa_csv, "wt") as fp:
         fp.write("name,genome_filename,protein_filename\n")
         for i, g in enumerate(g_fa):
-            name = os.path.basename(g).split('.fa')[0]
+            name = os.path.basename(g).split(".fa")[0]
             if i < len(p_fa):
                 p = p_fa[i]
                 fp.write("{},{},{}\n".format(name, g, p))
             else:
-                fp.write("{},{}\n".format(name, g)) # missing prot path, no trailing comma
+                fp.write(
+                    "{},{}\n".format(name, g)
+                )  # missing prot path, no trailing comma
 
-    output = runtmp.output('db.zip')
+    output = runtmp.output("db.zip")
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'manysketch', fa_csv,
-                        '-o', output)
+        runtmp.sourmash("scripts", "manysketch", fa_csv, "-o", output)
 
     captured = capfd.readouterr()
     print(captured.err)
-    assert 'found record with 2 fields' in captured.err
+    assert "found record with 2 fields" in captured.err
     assert "Could not load fromfile csv" in captured.err
 
 
 def test_manysketch_bad_param_str_moltype(runtmp, capfd):
     # no moltype provided in param str
-    fa_csv = runtmp.output('db-fa.txt')
+    fa_csv = runtmp.output("db-fa.txt")
 
-    fa1 = get_test_data('short.fa')
-    fa2 = get_test_data('short2.fa')
-    fa3 = get_test_data('short3.fa')
+    fa1 = get_test_data("short.fa")
+    fa2 = get_test_data("short2.fa")
+    fa3 = get_test_data("short3.fa")
 
     make_assembly_csv(fa_csv, [fa1, fa2, fa3])
-    output = runtmp.output('out.zip')
+    output = runtmp.output("out.zip")
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'manysketch', fa_csv,
-                        '-o', output, '-p', 'k=31,scaled=100')
+        runtmp.sourmash(
+            "scripts", "manysketch", fa_csv, "-o", output, "-p", "k=31,scaled=100"
+        )
 
     captured = capfd.readouterr()
     print(captured.err)
-    assert "Error parsing params string: No moltype provided in params string k=31,scaled=100" in captured.err
+    assert (
+        "Error parsing params string: No moltype provided in params string k=31,scaled=100"
+        in captured.err
+    )
     assert "Failed to parse params string" in captured.err
 
 
 def test_manysketch_bad_param_str_ksize(runtmp, capfd):
     # no ksize provided in param str
-    fa_csv = runtmp.output('db-fa.txt')
+    fa_csv = runtmp.output("db-fa.txt")
 
-    fa1 = get_test_data('short.fa')
-    fa2 = get_test_data('short2.fa')
-    fa3 = get_test_data('short3.fa')
+    fa1 = get_test_data("short.fa")
+    fa2 = get_test_data("short2.fa")
+    fa3 = get_test_data("short3.fa")
 
     make_assembly_csv(fa_csv, [fa1, fa2, fa3])
-    output = runtmp.output('out.zip')
+    output = runtmp.output("out.zip")
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'manysketch', fa_csv,
-                        '-o', output, '-p', 'dna,scaled=100')
+        runtmp.sourmash(
+            "scripts", "manysketch", fa_csv, "-o", output, "-p", "dna,scaled=100"
+        )
 
     captured = capfd.readouterr()
     print(captured.err)
-    assert "Error parsing params string: No ksizes provided in params string dna,scaled=100" in captured.err
+    assert (
+        "Error parsing params string: No ksizes provided in params string dna,scaled=100"
+        in captured.err
+    )
     assert "Failed to parse params string" in captured.err
 
+
 def test_manysketch_empty_fa_csv(runtmp, capfd):
     # test empty fa_csv file
-    fa_csv = runtmp.output('fa.txt')
-    output = runtmp.output('out.zip')
-    make_assembly_csv(fa_csv, []) # empty
+    fa_csv = runtmp.output("fa.txt")
+    output = runtmp.output("out.zip")
+    make_assembly_csv(fa_csv, [])  # empty
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'manysketch', fa_csv,
-                        '-o', output)
+        runtmp.sourmash("scripts", "manysketch", fa_csv, "-o", output)
 
     captured = capfd.readouterr()
     print(captured.err)
@@ -430,23 +499,31 @@ def test_manysketch_empty_fa_csv(runtmp, capfd):
 
 
 def test_manysketch_duplicated_rows(runtmp, capfd):
-    fa_csv = runtmp.output('db-fa.csv')
+    fa_csv = runtmp.output("db-fa.csv")
 
-    fa1 = get_test_data('short.fa')
-    fa2 = get_test_data('short2.fa')
-    fa3 = get_test_data('short3.fa')
-    protfa1 = get_test_data('short-protein.fa')
+    fa1 = get_test_data("short.fa")
+    fa2 = get_test_data("short2.fa")
+    fa3 = get_test_data("short3.fa")
+    protfa1 = get_test_data("short-protein.fa")
 
     make_assembly_csv(fa_csv, [fa1, fa1, fa1, fa3])
 
-    output = runtmp.output('db.zip')
+    output = runtmp.output("db.zip")
 
-    runtmp.sourmash('scripts', 'manysketch', fa_csv, '-o', output,
-                    '--param-str', "dna,k=21,scaled=1",
-                    '--param-str', "protein,k=10,scaled=1")
+    runtmp.sourmash(
+        "scripts",
+        "manysketch",
+        fa_csv,
+        "-o",
+        output,
+        "--param-str",
+        "dna,k=21,scaled=1",
+        "--param-str",
+        "protein,k=10,scaled=1",
+    )
 
     assert os.path.exists(output)
-    assert not runtmp.last_result.out # stdout should be empty
+    assert not runtmp.last_result.out  # stdout should be empty
 
     idx = sourmash.load_file_as_index(output)
     sigs = list(idx.signatures())
@@ -460,21 +537,22 @@ def test_manysketch_duplicated_rows(runtmp, capfd):
 
 def test_manysketch_N_in_dna(runtmp):
     # make sure we can handle Ns in DNA sequences
-    fa_csv = runtmp.output('db-fa.txt')
-    fa1 = runtmp.output('bad.fa')
-    with open (fa1, 'wt') as fp:
+    fa_csv = runtmp.output("db-fa.txt")
+    fa1 = runtmp.output("bad.fa")
+    with open(fa1, "wt") as fp:
         fp.write(">bad\n")
         fp.write("ACAGTN\n")
 
     make_assembly_csv(fa_csv, [fa1])
 
-    output = runtmp.output('db.zip')
+    output = runtmp.output("db.zip")
 
-    runtmp.sourmash('scripts', 'manysketch', fa_csv, '-o', output,
-                    '--param-str', "dna,k=4,scaled=1")
+    runtmp.sourmash(
+        "scripts", "manysketch", fa_csv, "-o", output, "--param-str", "dna,k=4,scaled=1"
+    )
 
     assert os.path.exists(output)
-    assert not runtmp.last_result.out # stdout should be empty
+    assert not runtmp.last_result.out  # stdout should be empty
 
     idx = sourmash.load_file_as_index(output)
     sigs = list(idx.signatures())
@@ -485,23 +563,30 @@ def test_manysketch_N_in_dna(runtmp):
 
 def test_zip_manifest(runtmp, capfd):
     # test basic manifest-generating functionality.
-    fa_csv = runtmp.output('db-fa.txt')
+    fa_csv = runtmp.output("db-fa.txt")
 
-    fa1 = get_test_data('short.fa')
-    fa2 = get_test_data('short2.fa')
-    fa3 = get_test_data('short3.fa')
+    fa1 = get_test_data("short.fa")
+    fa2 = get_test_data("short2.fa")
+    fa3 = get_test_data("short3.fa")
 
     make_assembly_csv(fa_csv, [fa1, fa2, fa3])
-    output = runtmp.output('db.zip')
-
-    runtmp.sourmash('scripts', 'manysketch', fa_csv, '-o', output,
-                    '--param-str', "dna,k=31,scaled=1")
+    output = runtmp.output("db.zip")
+
+    runtmp.sourmash(
+        "scripts",
+        "manysketch",
+        fa_csv,
+        "-o",
+        output,
+        "--param-str",
+        "dna,k=31,scaled=1",
+    )
 
     loader = sourmash.load_file_as_index(output)
 
     rows = []
     siglist = []
-    for (sig, loc) in loader._signatures_with_internal():
+    for sig, loc in loader._signatures_with_internal():
         row = index.CollectionManifest.make_manifest_row(sig, loc)
         rows.append(row)
         siglist.append(sig)
@@ -511,37 +596,44 @@ def test_zip_manifest(runtmp, capfd):
     assert len(manifest) == len(rows)
     assert len(manifest) == 3
 
-    md5_list = [ row['md5'] for row in manifest.rows ]
-    assert '9191284a3a23a913d8d410f3d53ce8f0' in md5_list
-    assert 'd663bb55b2a0f8782c53c8af89f20fff' in md5_list
-    assert 'bf752903d635b1eb83c53fe4aae951db' in md5_list
+    md5_list = [row["md5"] for row in manifest.rows]
+    assert "9191284a3a23a913d8d410f3d53ce8f0" in md5_list
+    assert "d663bb55b2a0f8782c53c8af89f20fff" in md5_list
+    assert "bf752903d635b1eb83c53fe4aae951db" in md5_list
 
     for sig in siglist:
         assert sig in manifest
         assert sig.minhash.ksize == 31
-        assert sig.minhash.moltype == 'DNA'
+        assert sig.minhash.moltype == "DNA"
         assert sig.minhash.scaled == 1
 
 
 def test_protein_zip_manifest(runtmp, capfd):
     # test basic manifest-generating functionality.
-    fa_csv = runtmp.output('db-fa.csv')
+    fa_csv = runtmp.output("db-fa.csv")
 
-    fa1 = get_test_data('short.fa')
-    fa2 = get_test_data('short-protein.fa')
+    fa1 = get_test_data("short.fa")
+    fa2 = get_test_data("short-protein.fa")
 
     make_assembly_csv(fa_csv, [fa1], [fa2])
-    output = runtmp.output('db.zip')
-
-    runtmp.sourmash('scripts', 'manysketch', fa_csv, '-o', output,
-                    '--param-str', "protein,k=10,scaled=1")
+    output = runtmp.output("db.zip")
+
+    runtmp.sourmash(
+        "scripts",
+        "manysketch",
+        fa_csv,
+        "-o",
+        output,
+        "--param-str",
+        "protein,k=10,scaled=1",
+    )
 
     loader = sourmash.load_file_as_index(output)
 
     rows = []
     siglist = []
     # make manifest via sourmash python code
-    for (sig, loc) in loader._signatures_with_internal():
+    for sig, loc in loader._signatures_with_internal():
         row = index.CollectionManifest.make_manifest_row(sig, loc)
         rows.append(row)
         siglist.append(sig)
@@ -551,76 +643,103 @@ def test_protein_zip_manifest(runtmp, capfd):
     assert len(manifest) == len(rows)
     assert len(manifest) == 1
 
-    md5_list = [ row['md5'] for row in manifest.rows ]
-    assert 'eb4467d11e0ecd2dbde4193bfc255310' in md5_list
-    ksize_list = [ row['ksize'] for row in manifest.rows ]
-    assert 10 in ksize_list # manifest ksizes are human-readable (k, not k*3)
-    scaled_list = [ row['scaled'] for row in manifest.rows ]
+    md5_list = [row["md5"] for row in manifest.rows]
+    assert "eb4467d11e0ecd2dbde4193bfc255310" in md5_list
+    ksize_list = [row["ksize"] for row in manifest.rows]
+    assert 10 in ksize_list  # manifest ksizes are human-readable (k, not k*3)
+    scaled_list = [row["scaled"] for row in manifest.rows]
     assert 1 in scaled_list
-    moltype_list = [ row['moltype'] for row in manifest.rows ]
+    moltype_list = [row["moltype"] for row in manifest.rows]
     assert "protein" in moltype_list
 
     for sig in siglist:
         assert sig in manifest
-        assert sig.minhash.ksize == 10 # minhash stores k*3, but does the conversion back for us
-        assert sig.minhash.moltype == 'protein'
+        assert (
+            sig.minhash.ksize == 10
+        )  # minhash stores k*3, but does the conversion back for us
+        assert sig.minhash.moltype == "protein"
         assert sig.minhash.scaled == 1
 
 
 def test_manysketch_singleton(runtmp):
-    fa_csv = runtmp.output('db-fa.txt')
+    fa_csv = runtmp.output("db-fa.txt")
 
-    fa1 = get_test_data('short.fa')
-    fa2 = get_test_data('short2.fa')
-    fa3 = get_test_data('short3.fa')
+    fa1 = get_test_data("short.fa")
+    fa2 = get_test_data("short2.fa")
+    fa3 = get_test_data("short3.fa")
 
     make_assembly_csv(fa_csv, [fa1, fa2, fa3])
 
-    output = runtmp.output('db.zip')
+    output = runtmp.output("db.zip")
 
-    runtmp.sourmash('scripts', 'manysketch', fa_csv, '-o', output,
-                    '--param-str', "dna,k=31,scaled=1", "--singleton")
+    runtmp.sourmash(
+        "scripts",
+        "manysketch",
+        fa_csv,
+        "-o",
+        output,
+        "--param-str",
+        "dna,k=31,scaled=1",
+        "--singleton",
+    )
 
     assert os.path.exists(output)
-    assert not runtmp.last_result.out # stdout should be empty
+    assert not runtmp.last_result.out  # stdout should be empty
 
     idx = sourmash.load_file_as_index(output)
     sigs = list(idx.signatures())
     print(sigs)
 
     assert len(sigs) == 4
-    singleton_sketch = runtmp.output('short3.sig')
-    runtmp.sourmash('sketch', 'dna', fa3, '-o', singleton_sketch,
-                    '--param-str', "dna,k=31,scaled=1", "--singleton")
+    singleton_sketch = runtmp.output("short3.sig")
+    runtmp.sourmash(
+        "sketch",
+        "dna",
+        fa3,
+        "-o",
+        singleton_sketch,
+        "--param-str",
+        "dna,k=31,scaled=1",
+        "--singleton",
+    )
     ss_sketch = sourmash.load_signatures(singleton_sketch)
     ss_sketch1 = next(ss_sketch)
     ss_sketch2 = next(ss_sketch)
 
-    expected_signames = ['shortName', 'tr1 4', 'firstname', 'other']
+    expected_signames = ["shortName", "tr1 4", "firstname", "other"]
     for sig in sigs:
         assert sig.name in expected_signames
-        if sig.name == 'firstname':
+        if sig.name == "firstname":
             assert sig == ss_sketch1
-        if sig.name == 'other':
+        if sig.name == "other":
             assert sig == ss_sketch2
 
 
 def test_manysketch_reads(runtmp, capfd):
-    fa_csv = runtmp.output('db-fa.csv')
+    fa_csv = runtmp.output("db-fa.csv")
 
-    fa1 = get_test_data('short.fa')
-    fa2 = get_test_data('short2.fa')
-    fa3 = get_test_data('short3.fa')
+    fa1 = get_test_data("short.fa")
+    fa2 = get_test_data("short2.fa")
+    fa3 = get_test_data("short3.fa")
 
-    make_reads_csv(fa_csv, [("short", fa1, fa2), ('short3', fa3, '')]) # make sure we can just do read1 alone
+    make_reads_csv(
+        fa_csv, [("short", fa1, fa2), ("short3", fa3, "")]
+    )  # make sure we can just do read1 alone
 
-    output = runtmp.output('db.zip')
+    output = runtmp.output("db.zip")
 
-    runtmp.sourmash('scripts', 'manysketch', fa_csv, '-o', output,
-                    '--param-str', "dna,k=31,scaled=1")
+    runtmp.sourmash(
+        "scripts",
+        "manysketch",
+        fa_csv,
+        "-o",
+        output,
+        "--param-str",
+        "dna,k=31,scaled=1",
+    )
 
     assert os.path.exists(output)
-    assert not runtmp.last_result.out # stdout should be empty
+    assert not runtmp.last_result.out  # stdout should be empty
     captured = capfd.readouterr()
     print(captured.out)
     print(captured.err)
@@ -633,40 +752,72 @@ def test_manysketch_reads(runtmp, capfd):
     print(sigs)
 
     assert len(sigs) == 2
-    s1 = runtmp.output('short.sig')
-    runtmp.sourmash('sketch', 'dna', fa1, fa2, '-o', s1,
-                    '--param-str', "k=31,scaled=1", '--name', 'short')
+    s1 = runtmp.output("short.sig")
+    runtmp.sourmash(
+        "sketch",
+        "dna",
+        fa1,
+        fa2,
+        "-o",
+        s1,
+        "--param-str",
+        "k=31,scaled=1",
+        "--name",
+        "short",
+    )
     sig1 = sourmash.load_one_signature(s1)
-    s3 = runtmp.output('short3.sig')
-    runtmp.sourmash('sketch', 'dna', fa3, '-o', s3,
-                    '--param-str', "k=31,scaled=1", '--name', 'short3')
+    s3 = runtmp.output("short3.sig")
+    runtmp.sourmash(
+        "sketch",
+        "dna",
+        fa3,
+        "-o",
+        s3,
+        "--param-str",
+        "k=31,scaled=1",
+        "--name",
+        "short3",
+    )
     sig2 = sourmash.load_one_signature(s3)
 
-    expected_signames = ['short', 'short3']
+    expected_signames = ["short", "short3"]
     for sig in sigs:
         assert sig.name in expected_signames
-        if sig.name == 'short':
+        if sig.name == "short":
             assert sig == sig1
-        if sig.name == 'short3':
+        if sig.name == "short3":
             assert sig == sig2
 
 
 def test_manysketch_reads_singleton(runtmp, capfd):
-    fa_csv = runtmp.output('db-fa.csv')
-
-    fa1 = get_test_data('short.fa')
-    fa2 = get_test_data('short2.fa')
-    fa3 = get_test_data('short3.fa')
-
-    make_reads_csv(fa_csv, [("short", fa2, fa3), ])
-
-    output = runtmp.output('db.zip')
-
-    runtmp.sourmash('scripts', 'manysketch', fa_csv, '-o', output,
-                    '--param-str', "dna,k=31,scaled=1", '--singleton')
+    fa_csv = runtmp.output("db-fa.csv")
+
+    fa1 = get_test_data("short.fa")
+    fa2 = get_test_data("short2.fa")
+    fa3 = get_test_data("short3.fa")
+
+    make_reads_csv(
+        fa_csv,
+        [
+            ("short", fa2, fa3),
+        ],
+    )
+
+    output = runtmp.output("db.zip")
+
+    runtmp.sourmash(
+        "scripts",
+        "manysketch",
+        fa_csv,
+        "-o",
+        output,
+        "--param-str",
+        "dna,k=31,scaled=1",
+        "--singleton",
+    )
 
     assert os.path.exists(output)
-    assert not runtmp.last_result.out # stdout should be empty
+    assert not runtmp.last_result.out  # stdout should be empty
     captured = capfd.readouterr()
     print(captured.out)
     print(captured.err)
@@ -679,52 +830,77 @@ def test_manysketch_reads_singleton(runtmp, capfd):
     print(sigs)
 
     assert len(sigs) == 3
-    s1 = runtmp.output('singleton.sig')
-    runtmp.sourmash('sketch', 'dna', fa2, fa3, '-o', s1,
-                    '--param-str', "k=31,scaled=1", '--singleton')
+    s1 = runtmp.output("singleton.sig")
+    runtmp.sourmash(
+        "sketch",
+        "dna",
+        fa2,
+        fa3,
+        "-o",
+        s1,
+        "--param-str",
+        "k=31,scaled=1",
+        "--singleton",
+    )
     ss = sourmash.load_signatures(s1)
 
     ss_sketch1 = next(ss)
     ss_sketch2 = next(ss)
     ss_sketch3 = next(ss)
 
-    expected_signames = ['tr1 4', 'firstname', 'other']
+    expected_signames = ["tr1 4", "firstname", "other"]
     for sig in sigs:
         assert sig.name in expected_signames
-        if sig.name == 'tr1 4':
+        if sig.name == "tr1 4":
             assert sig == ss_sketch1
-        elif sig.name == 'firstname':
+        elif sig.name == "firstname":
             assert sig == ss_sketch2
-        elif sig.name == 'other':
+        elif sig.name == "other":
             assert sig == ss_sketch3
 
 
 def test_manysketch_prefix(runtmp, capfd):
-    fa_csv = runtmp.output('db-fa.csv')
+    fa_csv = runtmp.output("db-fa.csv")
 
-    fa1 = get_test_data('short.fa')
+    fa1 = get_test_data("short.fa")
 
     fa_path = os.path.dirname(fa1)
-    dna_prefix = os.path.join(fa_path, "short*fa") # need to avoid matching short-protein.fa
+    dna_prefix = os.path.join(
+        fa_path, "short*fa"
+    )  # need to avoid matching short-protein.fa
     prot_prefix = os.path.join(fa_path, "*protein.fa")
 
     # make prefix input file
-    with open(fa_csv, 'wt') as fp:
+    with open(fa_csv, "wt") as fp:
         fp.write("name,input_moltype,prefix,exclude\n")
-        fp.write(f"short,DNA,{dna_prefix},{prot_prefix}\n") # short.fa, short2.fa, short3.fa, short-protein.fa
-        fp.write(f"short_protein,protein,{prot_prefix},\n") # short-protein.fa only
-
-    output = runtmp.output('prefix.zip')
-
-    runtmp.sourmash('scripts', 'manysketch', fa_csv, '-o', output,
-                    '--param-str', "dna,k=31,scaled=1", '-p', "protein,k=10,scaled=1")
+        fp.write(
+            f"short,DNA,{dna_prefix},{prot_prefix}\n"
+        )  # short.fa, short2.fa, short3.fa, short-protein.fa
+        fp.write(f"short_protein,protein,{prot_prefix},\n")  # short-protein.fa only
+
+    output = runtmp.output("prefix.zip")
+
+    runtmp.sourmash(
+        "scripts",
+        "manysketch",
+        fa_csv,
+        "-o",
+        output,
+        "--param-str",
+        "dna,k=31,scaled=1",
+        "-p",
+        "protein,k=10,scaled=1",
+    )
 
     assert os.path.exists(output)
-    assert not runtmp.last_result.out # stdout should be empty
+    assert not runtmp.last_result.out  # stdout should be empty
     captured = capfd.readouterr()
     print(captured.out)
     print(captured.err)
-    assert "Found 'prefix' CSV. Using 'glob' to find files based on 'prefix' column." in captured.out
+    assert (
+        "Found 'prefix' CSV. Using 'glob' to find files based on 'prefix' column."
+        in captured.out
+    )
     assert "DONE. Processed 4 fasta files" in captured.err
 
     idx = sourmash.load_file_as_index(output)
@@ -734,55 +910,93 @@ def test_manysketch_prefix(runtmp, capfd):
     assert len(sigs) == 2
 
     # make same sigs with sourmash
-    fa2 = get_test_data('short2.fa')
-    fa3 = get_test_data('short3.fa')
-    fa4 = get_test_data('short-protein.fa')
-    s1 = runtmp.output('short.sig')
-    runtmp.sourmash('sketch', 'dna', fa1, fa2, fa3, '-o', s1,
-                    '--param-str', "dna,k=31,scaled=1", '--name', 'short')
+    fa2 = get_test_data("short2.fa")
+    fa3 = get_test_data("short3.fa")
+    fa4 = get_test_data("short-protein.fa")
+    s1 = runtmp.output("short.sig")
+    runtmp.sourmash(
+        "sketch",
+        "dna",
+        fa1,
+        fa2,
+        fa3,
+        "-o",
+        s1,
+        "--param-str",
+        "dna,k=31,scaled=1",
+        "--name",
+        "short",
+    )
     sig1 = sourmash.load_one_signature(s1)
-    s2 = runtmp.output('short-protein.sig')
-    runtmp.sourmash('sketch', 'protein', fa4, '-o', s2,
-                    '--param-str', "protein,k=10,scaled=1", '--name', 'short_protein')
+    s2 = runtmp.output("short-protein.sig")
+    runtmp.sourmash(
+        "sketch",
+        "protein",
+        fa4,
+        "-o",
+        s2,
+        "--param-str",
+        "protein,k=10,scaled=1",
+        "--name",
+        "short_protein",
+    )
     sig2 = sourmash.load_one_signature(s2)
 
-    expected_signames = ['short', 'short_protein']
+    expected_signames = ["short", "short_protein"]
     for sig in sigs:
         assert sig.name in expected_signames
-        if sig.name == 'short':
-            assert sig,minhash.hashes == sig1.minhash.hashes
-        if sig.name == 'short_protein':
+        if sig.name == "short":
+            assert sig, minhash.hashes == sig1.minhash.hashes
+        if sig.name == "short_protein":
             assert sig == sig2
 
 
 def test_manysketch_prefix2(runtmp, capfd):
-    fa_csv = runtmp.output('db-fa.csv')
+    fa_csv = runtmp.output("db-fa.csv")
 
-    fa1 = get_test_data('short.fa')
+    fa1 = get_test_data("short.fa")
 
     fa_path = os.path.dirname(fa1)
     # test without '*'
-    dna_prefix = os.path.join(fa_path, "short") # need to avoid matching short-protein.fa
+    dna_prefix = os.path.join(
+        fa_path, "short"
+    )  # need to avoid matching short-protein.fa
     prot_prefix = os.path.join(fa_path, "*protein")
     zip_exclude = os.path.join(fa_path, "*zip")
 
     # make prefix input file
-    with open(fa_csv, 'wt') as fp:
+    with open(fa_csv, "wt") as fp:
         fp.write("name,input_moltype,prefix,exclude\n")
-        fp.write(f"short,DNA,{dna_prefix},{prot_prefix}\n") # short.fa, short2.fa, short3.fa, short-protein.fa
-        fp.write(f"short_protein,protein,{prot_prefix},{zip_exclude}\n") # short-protein.fa only
-
-    output = runtmp.output('prefix.zip')
-
-    runtmp.sourmash('scripts', 'manysketch', fa_csv, '-o', output,
-                    '--param-str', "dna,k=31,scaled=1", '-p', "protein,k=10,scaled=1")
+        fp.write(
+            f"short,DNA,{dna_prefix},{prot_prefix}\n"
+        )  # short.fa, short2.fa, short3.fa, short-protein.fa
+        fp.write(
+            f"short_protein,protein,{prot_prefix},{zip_exclude}\n"
+        )  # short-protein.fa only
+
+    output = runtmp.output("prefix.zip")
+
+    runtmp.sourmash(
+        "scripts",
+        "manysketch",
+        fa_csv,
+        "-o",
+        output,
+        "--param-str",
+        "dna,k=31,scaled=1",
+        "-p",
+        "protein,k=10,scaled=1",
+    )
 
     assert os.path.exists(output)
-    assert not runtmp.last_result.out # stdout should be empty
+    assert not runtmp.last_result.out  # stdout should be empty
     captured = capfd.readouterr()
     print(captured.out)
     print(captured.err)
-    assert "Found 'prefix' CSV. Using 'glob' to find files based on 'prefix' column." in captured.out
+    assert (
+        "Found 'prefix' CSV. Using 'glob' to find files based on 'prefix' column."
+        in captured.out
+    )
     assert "DONE. Processed 4 fasta files" in captured.err
 
     idx = sourmash.load_file_as_index(output)
@@ -792,98 +1006,164 @@ def test_manysketch_prefix2(runtmp, capfd):
     assert len(sigs) == 2
 
     # make same sigs with sourmash
-    fa2 = get_test_data('short2.fa')
-    fa3 = get_test_data('short3.fa')
-    fa4 = get_test_data('short-protein.fa')
-    s1 = runtmp.output('short.sig')
-    runtmp.sourmash('sketch', 'dna', fa1, fa2, fa3, '-o', s1,
-                    '--param-str', "dna,k=31,scaled=1", '--name', 'short')
+    fa2 = get_test_data("short2.fa")
+    fa3 = get_test_data("short3.fa")
+    fa4 = get_test_data("short-protein.fa")
+    s1 = runtmp.output("short.sig")
+    runtmp.sourmash(
+        "sketch",
+        "dna",
+        fa1,
+        fa2,
+        fa3,
+        "-o",
+        s1,
+        "--param-str",
+        "dna,k=31,scaled=1",
+        "--name",
+        "short",
+    )
     sig1 = sourmash.load_one_signature(s1)
-    s2 = runtmp.output('short-protein.sig')
-    runtmp.sourmash('sketch', 'protein', fa4, '-o', s2,
-                    '--param-str', "protein,k=10,scaled=1", '--name', 'short_protein')
+    s2 = runtmp.output("short-protein.sig")
+    runtmp.sourmash(
+        "sketch",
+        "protein",
+        fa4,
+        "-o",
+        s2,
+        "--param-str",
+        "protein,k=10,scaled=1",
+        "--name",
+        "short_protein",
+    )
     sig2 = sourmash.load_one_signature(s2)
 
-    expected_signames = ['short', 'short_protein']
+    expected_signames = ["short", "short_protein"]
     for sig in sigs:
         assert sig.name in expected_signames
-        if sig.name == 'short':
-            assert sig,minhash.hashes == sig1.minhash.hashes
-        if sig.name == 'short_protein':
+        if sig.name == "short":
+            assert sig, minhash.hashes == sig1.minhash.hashes
+        if sig.name == "short_protein":
             assert sig == sig2
 
 
 def test_manysketch_prefix_duplicated_fail(runtmp, capfd):
-    fa_csv = runtmp.output('db-fa.csv')
+    fa_csv = runtmp.output("db-fa.csv")
 
-    fa1 = get_test_data('short.fa')
+    fa1 = get_test_data("short.fa")
 
     fa_path = os.path.dirname(fa1)
     # test without '*'
-    dna_prefix = os.path.join(fa_path, "short") # need to avoid matching short-protein.fa
+    dna_prefix = os.path.join(
+        fa_path, "short"
+    )  # need to avoid matching short-protein.fa
     prot_prefix = os.path.join(fa_path, "*protein")
     zip_exclude = os.path.join(fa_path, "*zip")
 
     # make prefix input file
-    with open(fa_csv, 'wt') as fp:
+    with open(fa_csv, "wt") as fp:
         fp.write("name,input_moltype,prefix,exclude\n")
-        fp.write(f"short,DNA,{dna_prefix},{prot_prefix}\n") # short.fa, short2.fa, short3.fa, short-protein.fa
-        fp.write(f"short,DNA,{dna_prefix},{prot_prefix}\n") # duplicate of row one -- this should just be skipped 
-        fp.write(f"short_protein,protein,{prot_prefix},{zip_exclude}\n") # short-protein.fa only
+        fp.write(
+            f"short,DNA,{dna_prefix},{prot_prefix}\n"
+        )  # short.fa, short2.fa, short3.fa, short-protein.fa
+        fp.write(
+            f"short,DNA,{dna_prefix},{prot_prefix}\n"
+        )  # duplicate of row one -- this should just be skipped
+        fp.write(
+            f"short_protein,protein,{prot_prefix},{zip_exclude}\n"
+        )  # short-protein.fa only
         # ALSO short-protein.fa, but different name. should raise err without force
         fp.write(f"second_protein,protein,{prot_prefix},{zip_exclude}\n")
 
-    output = runtmp.output('prefix.zip')
+    output = runtmp.output("prefix.zip")
 
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'manysketch', fa_csv, '-o', output,
-                        '--param-str', "dna,k=31,scaled=1", '-p', "protein,k=10,scaled=1")
+        runtmp.sourmash(
+            "scripts",
+            "manysketch",
+            fa_csv,
+            "-o",
+            output,
+            "--param-str",
+            "dna,k=31,scaled=1",
+            "-p",
+            "protein,k=10,scaled=1",
+        )
 
     assert not os.path.exists(output)
-    assert not runtmp.last_result.out # stdout should be empty
+    assert not runtmp.last_result.out  # stdout should be empty
     captured = capfd.readouterr()
     print(captured.out)
     print(captured.err)
-    assert "Found 'prefix' CSV. Using 'glob' to find files based on 'prefix' column." in captured.out
+    assert (
+        "Found 'prefix' CSV. Using 'glob' to find files based on 'prefix' column."
+        in captured.out
+    )
     assert "Found identical FASTA paths in more than one row!" in captured.err
     assert "Duplicated paths:" in captured.err
     assert "short-protein.fa" in captured.err
-    assert "Duplicated FASTA files found. Please use --force to bypass this check" in captured.err
+    assert (
+        "Duplicated FASTA files found. Please use --force to bypass this check"
+        in captured.err
+    )
 
 
 def test_manysketch_prefix_duplicated_force(runtmp, capfd):
-    fa_csv = runtmp.output('db-fa.csv')
+    fa_csv = runtmp.output("db-fa.csv")
 
-    fa1 = get_test_data('short.fa')
+    fa1 = get_test_data("short.fa")
 
     fa_path = os.path.dirname(fa1)
     # test without '*'
-    dna_prefix = os.path.join(fa_path, "short") # need to avoid matching short-protein.fa
+    dna_prefix = os.path.join(
+        fa_path, "short"
+    )  # need to avoid matching short-protein.fa
     prot_prefix = os.path.join(fa_path, "*protein")
     zip_exclude = os.path.join(fa_path, "*zip")
 
     # make prefix input file
-    with open(fa_csv, 'wt') as fp:
+    with open(fa_csv, "wt") as fp:
         fp.write("name,input_moltype,prefix,exclude\n")
-        fp.write(f"short,DNA,{dna_prefix},{prot_prefix}\n") # short.fa, short2.fa, short3.fa, short-protein.fa
-        fp.write(f"short,DNA,{dna_prefix},{prot_prefix}\n") # duplicate of row one -- this should just be skipped 
-        fp.write(f"short_protein,protein,{prot_prefix},{zip_exclude}\n") # short-protein.fa only
+        fp.write(
+            f"short,DNA,{dna_prefix},{prot_prefix}\n"
+        )  # short.fa, short2.fa, short3.fa, short-protein.fa
+        fp.write(
+            f"short,DNA,{dna_prefix},{prot_prefix}\n"
+        )  # duplicate of row one -- this should just be skipped
+        fp.write(
+            f"short_protein,protein,{prot_prefix},{zip_exclude}\n"
+        )  # short-protein.fa only
         # ALSO short-protein.fa, but different name. should raise err without force
         fp.write(f"second_protein,protein,{prot_prefix},{zip_exclude}\n")
 
-    output = runtmp.output('prefix.zip')
-
-    runtmp.sourmash('scripts', 'manysketch', fa_csv, '-o', output,
-                    '--param-str', "dna,k=31,scaled=1", '-p', "protein,k=10,scaled=1",
-                    '--force')
+    output = runtmp.output("prefix.zip")
+
+    runtmp.sourmash(
+        "scripts",
+        "manysketch",
+        fa_csv,
+        "-o",
+        output,
+        "--param-str",
+        "dna,k=31,scaled=1",
+        "-p",
+        "protein,k=10,scaled=1",
+        "--force",
+    )
 
     assert os.path.exists(output)
-    assert not runtmp.last_result.out # stdout should be empty
+    assert not runtmp.last_result.out  # stdout should be empty
     captured = capfd.readouterr()
     print(captured.out)
     print(captured.err)
-    assert "Found 'prefix' CSV. Using 'glob' to find files based on 'prefix' column." in captured.out
-    assert "Loaded 3 rows in total (3 DNA FASTA and 2 protein FASTA), 1 duplicate rows skipped." in captured.out
+    assert (
+        "Found 'prefix' CSV. Using 'glob' to find files based on 'prefix' column."
+        in captured.out
+    )
+    assert (
+        "Loaded 3 rows in total (3 DNA FASTA and 2 protein FASTA), 1 duplicate rows skipped."
+        in captured.out
+    )
     assert "Found identical FASTA paths in more than one row!" in captured.err
     assert "Duplicated paths:" in captured.err
     assert "short-protein.fa" in captured.err
@@ -895,19 +1175,20 @@ def test_manysketch_prefix_duplicated_force(runtmp, capfd):
 
     assert len(sigs) == 3
 
+
 def test_singlesketch_simple(runtmp):
     """Test basic single sketching with default parameters."""
-    fa1 = get_test_data('short.fa')
-    output = runtmp.output('short.sig')
+    fa1 = get_test_data("short.fa")
+    output = runtmp.output("short.sig")
 
     # Run the singlesketch command
-    runtmp.sourmash('scripts', 'singlesketch', fa1, '-o', output)
+    runtmp.sourmash("scripts", "singlesketch", fa1, "-o", output)
 
     # Check if the output exists and contains the expected data
     assert os.path.exists(output)
     sig = sourmash.load_one_signature(output)
-    
-    assert sig.name == 'short.fa'
+
+    assert sig.name == "short.fa"
     assert sig.minhash.ksize == 31
     assert sig.minhash.is_dna
     assert sig.minhash.scaled == 1000
@@ -915,17 +1196,17 @@ def test_singlesketch_simple(runtmp):
 
 def test_singlesketch_with_name(runtmp):
     """Test single sketching with a custom name."""
-    fa1 = get_test_data('short.fa')
-    output = runtmp.output('short_named.sig')
+    fa1 = get_test_data("short.fa")
+    output = runtmp.output("short_named.sig")
 
     # Run the singlesketch command with the --name option
-    runtmp.sourmash('scripts', 'singlesketch', fa1, '-o', output, '-n', 'custom_name')
+    runtmp.sourmash("scripts", "singlesketch", fa1, "-o", output, "-n", "custom_name")
 
     # Check if the output exists and contains the expected data
     assert os.path.exists(output)
     sig = sourmash.load_one_signature(output)
 
-    assert sig.name == 'custom_name'
+    assert sig.name == "custom_name"
     assert sig.minhash.ksize == 31
     assert sig.minhash.is_dna
     assert sig.minhash.scaled == 1000
@@ -933,11 +1214,21 @@ def test_singlesketch_with_name(runtmp):
 
 def test_singlesketch_mult_k(runtmp):
     """Test single sketching with multiple k-mer sizes."""
-    fa1 = get_test_data('short.fa')
-    output = runtmp.output('short_mult_k.sig')
+    fa1 = get_test_data("short.fa")
+    output = runtmp.output("short_mult_k.sig")
 
     # Run the singlesketch command with multiple k sizes
-    runtmp.sourmash('scripts', 'singlesketch', fa1, '-o', output, '-p', 'k=21,scaled=100', '-p', 'k=31,scaled=100')
+    runtmp.sourmash(
+        "scripts",
+        "singlesketch",
+        fa1,
+        "-o",
+        output,
+        "-p",
+        "k=21,scaled=100",
+        "-p",
+        "k=31,scaled=100",
+    )
 
     # Check if the output exists and contains the expected data
     assert os.path.exists(output)
@@ -951,11 +1242,13 @@ def test_singlesketch_mult_k(runtmp):
 
 def test_singlesketch_mult_moltype(runtmp):
     """Test single sketching with different molecule types."""
-    fa1 = get_test_data('short-protein.fa')
-    output = runtmp.output('short_mult_moltype.sig')
+    fa1 = get_test_data("short-protein.fa")
+    output = runtmp.output("short_mult_moltype.sig")
 
     # Run the singlesketch command with multiple molecule types
-    runtmp.sourmash('scripts', 'singlesketch', fa1, '-o', output, '-p', 'protein,k=10,scaled=100')
+    runtmp.sourmash(
+        "scripts", "singlesketch", fa1, "-o", output, "-p", "protein,k=10,scaled=100"
+    )
 
     # Check if the output exists and contains the expected data
     assert os.path.exists(output)
@@ -969,12 +1262,14 @@ def test_singlesketch_mult_moltype(runtmp):
 
 def test_singlesketch_invalid_params(runtmp, capfd):
     """Test singlesketch command with invalid parameters."""
-    fa1 = get_test_data('short.fa')
-    output = runtmp.output('short_invalid.sig')
+    fa1 = get_test_data("short.fa")
+    output = runtmp.output("short_invalid.sig")
 
     # Run the singlesketch command with an invalid parameter string
     with pytest.raises(utils.SourmashCommandFailed):
-        runtmp.sourmash('scripts', 'singlesketch', fa1, '-o', output, '-p', 'invalid_param')
+        runtmp.sourmash(
+            "scripts", "singlesketch", fa1, "-o", output, "-p", "invalid_param"
+        )
 
     # Check that the error message is correct
     captured = capfd.readouterr()