From 74c25c380cbe919a731cf4aa186b444964b07332 Mon Sep 17 00:00:00 2001
From: Luiz Irber <contact+github@luizirber.org>
Date: Tue, 23 Jul 2024 17:50:31 -0700
Subject: [PATCH] leave Python changes for another PR

---
 .github/workflows/codspeed.yml |  19 ----
 .github/workflows/rust.yml     |   2 +-
 Cargo.lock                     |  31 +++++-
 benchmarks/benchmarks.py       | 124 +++++++++++++++++++++++
 pyproject.toml                 |   1 -
 tests/test_benchmarks.py       | 176 ---------------------------------
 tox.ini                        |  32 +-----
 7 files changed, 153 insertions(+), 232 deletions(-)
 delete mode 100644 tests/test_benchmarks.py

diff --git a/.github/workflows/codspeed.yml b/.github/workflows/codspeed.yml
index 6c5e4b1dda..9c879f2f36 100644
--- a/.github/workflows/codspeed.yml
+++ b/.github/workflows/codspeed.yml
@@ -32,22 +32,3 @@ jobs:
         with:
           run: "cd src/core && cargo codspeed run"
           token: ${{ secrets.CODSPEED_TOKEN }}
-
-  benchmarks-python:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v3
-      - uses: actions/setup-python@v3
-        with:
-          python-version: "3.12"
-
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install tox
-
-      - name: Run benchmarks
-        uses: CodSpeedHQ/action@v2
-        with:
-          token: ${{ secrets.CODSPEED_TOKEN }}
-          run: tox -e codspeed
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index ddce31eb49..1a1b27b8fd 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -260,7 +260,7 @@ jobs:
         uses: actions-rs/cargo@v1
         with:
           command: build
-          args: --all-features --tests
+          args: --all-features
 
   check_cbindgen:
     name: "Check if cbindgen runs cleanly for generating the C headers"
diff --git a/Cargo.lock b/Cargo.lock
index b818517faa..f7b01f5e7d 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -61,6 +61,17 @@ dependencies = [
  "num-traits",
 ]
 
+[[package]]
+name = "atty"
+version = "0.2.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
+dependencies = [
+ "hermit-abi 0.1.19",
+ "libc",
+ "winapi",
+]
+
 [[package]]
 name = "autocfg"
 version = "1.1.0"
@@ -382,12 +393,13 @@ dependencies = [
 
 [[package]]
 name = "colored"
-version = "2.1.0"
+version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cbf2150cce219b664a8a70df7a1f933836724b503f8a413af9365b4dcc4d90b8"
+checksum = "b3616f750b84d8f0de8a58bda93e08e2a81ad3f523089b05f1dffecab48c6cbd"
 dependencies = [
+ "atty",
  "lazy_static",
- "windows-sys 0.48.0",
+ "winapi",
 ]
 
 [[package]]
@@ -686,6 +698,15 @@ version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
 
+[[package]]
+name = "hermit-abi"
+version = "0.1.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
+dependencies = [
+ "libc",
+]
+
 [[package]]
 name = "hermit-abi"
 version = "0.3.2"
@@ -740,7 +761,7 @@ version = "1.0.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2"
 dependencies = [
- "hermit-abi",
+ "hermit-abi 0.3.2",
  "libc",
  "windows-sys 0.48.0",
 ]
@@ -751,7 +772,7 @@ version = "0.4.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "adcf93614601c8129ddf72e2d5633df827ba6551541c6d8c59520a371475be1f"
 dependencies = [
- "hermit-abi",
+ "hermit-abi 0.3.2",
  "io-lifetimes",
  "rustix 0.37.25",
  "windows-sys 0.48.0",
diff --git a/benchmarks/benchmarks.py b/benchmarks/benchmarks.py
index 93a8e1ab23..d517bf7b2f 100644
--- a/benchmarks/benchmarks.py
+++ b/benchmarks/benchmarks.py
@@ -33,6 +33,74 @@ def load_sequences():
     return sequences
 
 
+class TimeMinHashSuite:
+    def setup(self):
+        self.mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=False)
+        self.protein_mh = MinHash(
+            MINHASH_NUM, MINHASH_K, is_protein=True, track_abundance=False
+        )
+        self.sequences = load_sequences()
+
+        self.populated_mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=False)
+        for seq in self.sequences:
+            self.populated_mh.add_sequence(seq)
+
+    def time_add_sequence(self):
+        mh = self.mh
+        sequences = self.sequences
+        for seq in sequences:
+            mh.add_sequence(seq)
+
+    def time_add_protein(self):
+        mh = self.protein_mh
+        sequences = self.sequences
+        for seq in sequences:
+            mh.add_protein(seq)
+
+    def time_get_mins(self):
+        mh = self.populated_mh
+        for i in range(GET_MINS_RANGE):
+            mh.get_mins()
+
+    def time_add_hash(self):
+        mh = self.mh
+        for i in range(ADD_HASH_RANGE):
+            mh.add_hash(i)
+
+    def time_add_many(self):
+        mh = self.mh
+        mh.add_many(list(range(ADD_MANY_RANGE)))
+
+    def time_similarity(self):
+        mh = self.mh
+        other_mh = self.populated_mh
+        for i in range(SIMILARITY_TIMES):
+            mh.similarity(other_mh)
+
+    def time_count_common(self):
+        mh = self.mh
+        other_mh = self.populated_mh
+        for i in range(COUNT_COMMON_TIMES):
+            mh.count_common(other_mh)
+
+    def time_merge(self):
+        mh = self.mh
+        other_mh = self.populated_mh
+        for i in range(MERGE_TIMES):
+            mh.merge(other_mh)
+
+    def time_copy(self):
+        mh = self.populated_mh
+        for i in range(COPY_TIMES):
+            mh.__copy__()
+
+    def time_concat(self):
+        mh = self.mh
+        other_mh = self.populated_mh
+        for i in range(CONCAT_TIMES):
+            mh += other_mh
+
+
 class PeakmemMinHashSuite:
     def setup(self):
         self.mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True)
@@ -66,6 +134,33 @@ def peakmem_add_many(self):
 ####################
 
 
+class TimeMinAbundanceSuite(TimeMinHashSuite):
+    def setup(self):
+        TimeMinHashSuite.setup(self)
+        self.mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True)
+
+        self.populated_mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True)
+        for seq in self.sequences:
+            self.populated_mh.add_sequence(seq)
+
+    def time_get_mins_abundance(self):
+        mh = self.populated_mh
+        for i in range(GET_MINS_RANGE):
+            mh.get_mins(with_abundance=True)
+
+    def time_set_abundances(self):
+        mh = self.mh
+        mins = self.populated_mh.get_mins(with_abundance=True)
+        for i in range(SET_ABUNDANCES_RANGE):
+            mh.set_abundances(mins)
+
+    def time_set_abundances_noclear(self):
+        mh = self.mh
+        mins = self.populated_mh.get_mins(with_abundance=True)
+        for i in range(SET_ABUNDANCES_RANGE):
+            mh.set_abundances(mins, clear=False)
+
+
 class PeakmemMinAbundanceSuite(PeakmemMinHashSuite):
     def setup(self):
         PeakmemMinHashSuite.setup(self)
@@ -75,6 +170,35 @@ def setup(self):
 ####################
 
 
+class TimeZipStorageSuite:
+    def setup(self):
+        import zipfile
+
+        self.zipfile = NamedTemporaryFile()
+
+        with zipfile.ZipFile(
+            self.zipfile, mode="w", compression=zipfile.ZIP_STORED
+        ) as storage:
+            for i in range(ZIP_STORAGE_WRITE):
+                # just so we have lots of entries
+                storage.writestr(str(i), b"0")
+            # one big-ish entry
+            storage.writestr("sig1", b"9" * 1_000_000)
+
+    def time_load_from_zipstorage(self):
+        with ZipStorage(self.zipfile.name) as storage:
+            for i in range(ZIP_STORAGE_LOAD):
+                storage.load("sig1")
+
+    def time_load_small_from_zipstorage(self):
+        with ZipStorage(self.zipfile.name) as storage:
+            for i in range(ZIP_STORAGE_LOAD):
+                storage.load("99999")
+
+    def teardown(self):
+        self.zipfile.close()
+
+
 class PeakmemZipStorageSuite:
     def setup(self):
         import zipfile
diff --git a/pyproject.toml b/pyproject.toml
index f18babba34..bf5df8eb18 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -103,7 +103,6 @@ test = [
   "pytest>=6.2.4,<8.4.0",
   "pytest-cov>=4,<6.0",
   "pytest-xdist>=3.1",
-  "pytest-benchmark>=4.0",
   "pyyaml>=6,<7",
   "diff-cover>=7.3",
   "covdefaults>=2.2.2",
diff --git a/tests/test_benchmarks.py b/tests/test_benchmarks.py
deleted file mode 100644
index d453bb5774..0000000000
--- a/tests/test_benchmarks.py
+++ /dev/null
@@ -1,176 +0,0 @@
-import random
-from tempfile import NamedTemporaryFile
-
-import pytest
-
-from sourmash.sbt_storage import ZipStorage
-from sourmash.minhash import MinHash
-
-RANDOM_SEQ_SIZE = 3000
-RANDOM_SEQ_NUMBER = 300
-
-MINHASH_NUM = 500
-MINHASH_K = 21
-
-GET_MINS_RANGE = 500
-ADD_HASH_RANGE = 10_000
-ADD_MANY_RANGE = 1000
-SIMILARITY_TIMES = 500
-COUNT_COMMON_TIMES = 500
-MERGE_TIMES = 500
-COPY_TIMES = 500
-CONCAT_TIMES = 500
-SET_ABUNDANCES_RANGE = 500
-ZIP_STORAGE_WRITE = 100_000
-ZIP_STORAGE_LOAD = 20
-
-
-def load_sequences():
-    sequences = []
-    for _ in range(10):
-        random_seq = random.sample(
-            "A,C,G,T".split(",") * RANDOM_SEQ_SIZE, RANDOM_SEQ_NUMBER
-        )
-        sequences.append("".join(random_seq))
-    return sequences
-
-
-@pytest.fixture
-def mh():
-    return MinHash(MINHASH_NUM, MINHASH_K, track_abundance=False)
-
-
-@pytest.fixture
-def mh_protein():
-    return MinHash(MINHASH_NUM, MINHASH_K, is_protein=True, track_abundance=False)
-
-
-@pytest.fixture
-def sequences():
-    return load_sequences()
-
-
-@pytest.fixture
-def populated_mh(sequences):
-    populated_mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=False)
-    for seq in sequences:
-        populated_mh.add_sequence(seq)
-    return populated_mh
-
-
-def test_add_sequence(benchmark, mh, sequences):
-    @benchmark
-    def bench():
-        for seq in sequences:
-            mh.add_sequence(seq)
-
-
-def test_add_protein(benchmark, mh_protein, sequences):
-    @benchmark
-    def bench():
-        for seq in sequences:
-            mh_protein.add_protein(seq)
-
-
-def test_get_mins(benchmark, populated_mh):
-    benchmark(populated_mh.get_mins)
-
-
-def test_add_hash(benchmark, mh):
-    @benchmark
-    def bench():
-        for i in range(ADD_HASH_RANGE):
-            mh.add_hash(i)
-
-
-def test_add_many(benchmark, mh):
-    benchmark(mh.add_many, list(range(ADD_MANY_RANGE)))
-
-
-def test_similarity(benchmark, mh, populated_mh):
-    benchmark(mh.similarity, populated_mh)
-
-
-def test_count_common(benchmark, mh, populated_mh):
-    benchmark(mh.count_common, populated_mh)
-
-
-def test_merge(benchmark, mh, populated_mh):
-    benchmark(mh.merge, populated_mh)
-
-
-def test_copy(benchmark, populated_mh):
-    benchmark(populated_mh.__copy__)
-
-
-def test_concat(benchmark, mh, populated_mh):
-    benchmark(mh.__iadd__, populated_mh)
-
-
-####################
-
-
-def setup(self):
-    TimeMinHashSuite.setup(self)
-    self.mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True)
-
-    self.populated_mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True)
-    for seq in self.sequences:
-        self.populated_mh.add_sequence(seq)
-
-
-def time_get_mins_abundance(self):
-    mh = self.populated_mh
-    for i in range(GET_MINS_RANGE):
-        mh.get_mins(with_abundance=True)
-
-
-def time_set_abundances(self):
-    mh = self.mh
-    mins = self.populated_mh.get_mins(with_abundance=True)
-    for i in range(SET_ABUNDANCES_RANGE):
-        mh.set_abundances(mins)
-
-
-def time_set_abundances_noclear(self):
-    mh = self.mh
-    mins = self.populated_mh.get_mins(with_abundance=True)
-    for i in range(SET_ABUNDANCES_RANGE):
-        mh.set_abundances(mins, clear=False)
-
-
-####################
-
-
-@pytest.fixture
-def zipstore():
-    import zipfile
-
-    zf = NamedTemporaryFile()
-
-    with zipfile.ZipFile(zf, mode="w", compression=zipfile.ZIP_STORED) as storage:
-        for i in range(ZIP_STORAGE_WRITE):
-            # just so we have lots of entries
-            storage.writestr(str(i), b"0")
-        # one big-ish entry
-        storage.writestr("sig1", b"9" * 1_000_000)
-
-    yield zf
-
-    zf.close()
-
-
-def test_load_from_zipstorage(benchmark, zipstore):
-    @benchmark
-    def bench():
-        with ZipStorage(zipstore.name) as storage:
-            for _ in range(ZIP_STORAGE_LOAD):
-                storage.load("sig1")
-
-
-def test_load_small_from_zipstorage(benchmark, zipstore):
-    @benchmark
-    def bench():
-        with ZipStorage(zipstore.name) as storage:
-            for _ in range(ZIP_STORAGE_LOAD):
-                storage.load("99999")
diff --git a/tox.ini b/tox.ini
index c1237267da..ecf66a2bcd 100644
--- a/tox.ini
+++ b/tox.ini
@@ -111,34 +111,6 @@ commands =
     asv machine --yes
     asv continuous latest HEAD {posargs}
 
-[testenv:benchmarks]
-description = run pytest-benchmark for benchmarking
-changedir = {toxinidir}
-commands =
-    pytest \
-      --cov "{envsitepackagesdir}/sourmash" \
-      --cov-config "{toxinidir}/tox.ini" \
-      --cov-report= \
-      --junitxml {toxworkdir}/junit.benchmarks.xml \
-      --benchmark-only \
-      -n 0 \
-      {posargs:tests}
-
-[testenv:codspeed]
-description = run codspeed for benchmarking
-deps =
-    pytest-codspeed
-changedir = {toxinidir}
-commands =
-    pytest \
-      --cov "{envsitepackagesdir}/sourmash" \
-      --cov-config "{toxinidir}/tox.ini" \
-      --cov-report= \
-      --junitxml {toxworkdir}/junit.codspeed.xml \
-      --codspeed \
-      -k benchmarks \
-      {posargs:tests}
-
 [testenv:docs]
 description = invoke sphinx-build to build the HTML docs
 basepython = python3.10
@@ -208,7 +180,7 @@ commands =
     coverage xml -i -o {toxworkdir}/coverage.xml
     coverage html -i -d {toxworkdir}/htmlcov
     diff-cover --compare-branch {env:DIFF_AGAINST:origin/latest} {toxworkdir}/coverage.xml
-depends = py312, py311, py310, pypy3, codspeed
+depends = py312, py311, py310, pypy3
 pass_env = {[testenv]pass_env}
     DIFF_AGAINST
 set_env = COVERAGE_FILE={toxworkdir}/.coverage
@@ -261,7 +233,7 @@ source = src/sourmash/
 python =
     3.10: py310, docs, package_description, coverage
     3.11: py311, coverage
-    3.12: py312, coverage, codspeed
+    3.12: py312, coverage
 
 [flake8]
 max-complexity = 22