From 74c25c380cbe919a731cf4aa186b444964b07332 Mon Sep 17 00:00:00 2001 From: Luiz Irber Date: Tue, 23 Jul 2024 17:50:31 -0700 Subject: [PATCH] leave Python changes for another PR --- .github/workflows/codspeed.yml | 19 ---- .github/workflows/rust.yml | 2 +- Cargo.lock | 31 +++++- benchmarks/benchmarks.py | 124 +++++++++++++++++++++++ pyproject.toml | 1 - tests/test_benchmarks.py | 176 --------------------------------- tox.ini | 32 +----- 7 files changed, 153 insertions(+), 232 deletions(-) delete mode 100644 tests/test_benchmarks.py diff --git a/.github/workflows/codspeed.yml b/.github/workflows/codspeed.yml index 6c5e4b1dda..9c879f2f36 100644 --- a/.github/workflows/codspeed.yml +++ b/.github/workflows/codspeed.yml @@ -32,22 +32,3 @@ jobs: with: run: "cd src/core && cargo codspeed run" token: ${{ secrets.CODSPEED_TOKEN }} - - benchmarks-python: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v3 - with: - python-version: "3.12" - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install tox - - - name: Run benchmarks - uses: CodSpeedHQ/action@v2 - with: - token: ${{ secrets.CODSPEED_TOKEN }} - run: tox -e codspeed diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index ddce31eb49..1a1b27b8fd 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -260,7 +260,7 @@ jobs: uses: actions-rs/cargo@v1 with: command: build - args: --all-features --tests + args: --all-features check_cbindgen: name: "Check if cbindgen runs cleanly for generating the C headers" diff --git a/Cargo.lock b/Cargo.lock index b818517faa..f7b01f5e7d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -61,6 +61,17 @@ dependencies = [ "num-traits", ] +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi 0.1.19", + "libc", + "winapi", +] + [[package]] name = "autocfg" version = "1.1.0" @@ -382,12 +393,13 @@ dependencies = [ [[package]] name = "colored" -version = "2.1.0" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbf2150cce219b664a8a70df7a1f933836724b503f8a413af9365b4dcc4d90b8" +checksum = "b3616f750b84d8f0de8a58bda93e08e2a81ad3f523089b05f1dffecab48c6cbd" dependencies = [ + "atty", "lazy_static", - "windows-sys 0.48.0", + "winapi", ] [[package]] @@ -686,6 +698,15 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + [[package]] name = "hermit-abi" version = "0.3.2" @@ -740,7 +761,7 @@ version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2" dependencies = [ - "hermit-abi", + "hermit-abi 0.3.2", "libc", "windows-sys 0.48.0", ] @@ -751,7 +772,7 @@ version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adcf93614601c8129ddf72e2d5633df827ba6551541c6d8c59520a371475be1f" dependencies = [ - "hermit-abi", + "hermit-abi 0.3.2", "io-lifetimes", "rustix 0.37.25", "windows-sys 0.48.0", diff --git a/benchmarks/benchmarks.py b/benchmarks/benchmarks.py index 93a8e1ab23..d517bf7b2f 100644 --- a/benchmarks/benchmarks.py +++ b/benchmarks/benchmarks.py @@ -33,6 +33,74 @@ def load_sequences(): return sequences +class TimeMinHashSuite: + def setup(self): + self.mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=False) + self.protein_mh = MinHash( + MINHASH_NUM, MINHASH_K, is_protein=True, track_abundance=False + ) + self.sequences = load_sequences() + + self.populated_mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=False) + for seq in self.sequences: + self.populated_mh.add_sequence(seq) + + def time_add_sequence(self): + mh = self.mh + sequences = self.sequences + for seq in sequences: + mh.add_sequence(seq) + + def time_add_protein(self): + mh = self.protein_mh + sequences = self.sequences + for seq in sequences: + mh.add_protein(seq) + + def time_get_mins(self): + mh = self.populated_mh + for i in range(GET_MINS_RANGE): + mh.get_mins() + + def time_add_hash(self): + mh = self.mh + for i in range(ADD_HASH_RANGE): + mh.add_hash(i) + + def time_add_many(self): + mh = self.mh + mh.add_many(list(range(ADD_MANY_RANGE))) + + def time_similarity(self): + mh = self.mh + other_mh = self.populated_mh + for i in range(SIMILARITY_TIMES): + mh.similarity(other_mh) + + def time_count_common(self): + mh = self.mh + other_mh = self.populated_mh + for i in range(COUNT_COMMON_TIMES): + mh.count_common(other_mh) + + def time_merge(self): + mh = self.mh + other_mh = self.populated_mh + for i in range(MERGE_TIMES): + mh.merge(other_mh) + + def time_copy(self): + mh = self.populated_mh + for i in range(COPY_TIMES): + mh.__copy__() + + def time_concat(self): + mh = self.mh + other_mh = self.populated_mh + for i in range(CONCAT_TIMES): + mh += other_mh + + class PeakmemMinHashSuite: def setup(self): self.mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True) @@ -66,6 +134,33 @@ def peakmem_add_many(self): #################### +class TimeMinAbundanceSuite(TimeMinHashSuite): + def setup(self): + TimeMinHashSuite.setup(self) + self.mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True) + + self.populated_mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True) + for seq in self.sequences: + self.populated_mh.add_sequence(seq) + + def time_get_mins_abundance(self): + mh = self.populated_mh + for i in range(GET_MINS_RANGE): + mh.get_mins(with_abundance=True) + + def time_set_abundances(self): + mh = self.mh + mins = self.populated_mh.get_mins(with_abundance=True) + for i in range(SET_ABUNDANCES_RANGE): + mh.set_abundances(mins) + + def time_set_abundances_noclear(self): + mh = self.mh + mins = self.populated_mh.get_mins(with_abundance=True) + for i in range(SET_ABUNDANCES_RANGE): + mh.set_abundances(mins, clear=False) + + class PeakmemMinAbundanceSuite(PeakmemMinHashSuite): def setup(self): PeakmemMinHashSuite.setup(self) @@ -75,6 +170,35 @@ def setup(self): #################### +class TimeZipStorageSuite: + def setup(self): + import zipfile + + self.zipfile = NamedTemporaryFile() + + with zipfile.ZipFile( + self.zipfile, mode="w", compression=zipfile.ZIP_STORED + ) as storage: + for i in range(ZIP_STORAGE_WRITE): + # just so we have lots of entries + storage.writestr(str(i), b"0") + # one big-ish entry + storage.writestr("sig1", b"9" * 1_000_000) + + def time_load_from_zipstorage(self): + with ZipStorage(self.zipfile.name) as storage: + for i in range(ZIP_STORAGE_LOAD): + storage.load("sig1") + + def time_load_small_from_zipstorage(self): + with ZipStorage(self.zipfile.name) as storage: + for i in range(ZIP_STORAGE_LOAD): + storage.load("99999") + + def teardown(self): + self.zipfile.close() + + class PeakmemZipStorageSuite: def setup(self): import zipfile diff --git a/pyproject.toml b/pyproject.toml index f18babba34..bf5df8eb18 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -103,7 +103,6 @@ test = [ "pytest>=6.2.4,<8.4.0", "pytest-cov>=4,<6.0", "pytest-xdist>=3.1", - "pytest-benchmark>=4.0", "pyyaml>=6,<7", "diff-cover>=7.3", "covdefaults>=2.2.2", diff --git a/tests/test_benchmarks.py b/tests/test_benchmarks.py deleted file mode 100644 index d453bb5774..0000000000 --- a/tests/test_benchmarks.py +++ /dev/null @@ -1,176 +0,0 @@ -import random -from tempfile import NamedTemporaryFile - -import pytest - -from sourmash.sbt_storage import ZipStorage -from sourmash.minhash import MinHash - -RANDOM_SEQ_SIZE = 3000 -RANDOM_SEQ_NUMBER = 300 - -MINHASH_NUM = 500 -MINHASH_K = 21 - -GET_MINS_RANGE = 500 -ADD_HASH_RANGE = 10_000 -ADD_MANY_RANGE = 1000 -SIMILARITY_TIMES = 500 -COUNT_COMMON_TIMES = 500 -MERGE_TIMES = 500 -COPY_TIMES = 500 -CONCAT_TIMES = 500 -SET_ABUNDANCES_RANGE = 500 -ZIP_STORAGE_WRITE = 100_000 -ZIP_STORAGE_LOAD = 20 - - -def load_sequences(): - sequences = [] - for _ in range(10): - random_seq = random.sample( - "A,C,G,T".split(",") * RANDOM_SEQ_SIZE, RANDOM_SEQ_NUMBER - ) - sequences.append("".join(random_seq)) - return sequences - - -@pytest.fixture -def mh(): - return MinHash(MINHASH_NUM, MINHASH_K, track_abundance=False) - - -@pytest.fixture -def mh_protein(): - return MinHash(MINHASH_NUM, MINHASH_K, is_protein=True, track_abundance=False) - - -@pytest.fixture -def sequences(): - return load_sequences() - - -@pytest.fixture -def populated_mh(sequences): - populated_mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=False) - for seq in sequences: - populated_mh.add_sequence(seq) - return populated_mh - - -def test_add_sequence(benchmark, mh, sequences): - @benchmark - def bench(): - for seq in sequences: - mh.add_sequence(seq) - - -def test_add_protein(benchmark, mh_protein, sequences): - @benchmark - def bench(): - for seq in sequences: - mh_protein.add_protein(seq) - - -def test_get_mins(benchmark, populated_mh): - benchmark(populated_mh.get_mins) - - -def test_add_hash(benchmark, mh): - @benchmark - def bench(): - for i in range(ADD_HASH_RANGE): - mh.add_hash(i) - - -def test_add_many(benchmark, mh): - benchmark(mh.add_many, list(range(ADD_MANY_RANGE))) - - -def test_similarity(benchmark, mh, populated_mh): - benchmark(mh.similarity, populated_mh) - - -def test_count_common(benchmark, mh, populated_mh): - benchmark(mh.count_common, populated_mh) - - -def test_merge(benchmark, mh, populated_mh): - benchmark(mh.merge, populated_mh) - - -def test_copy(benchmark, populated_mh): - benchmark(populated_mh.__copy__) - - -def test_concat(benchmark, mh, populated_mh): - benchmark(mh.__iadd__, populated_mh) - - -#################### - - -def setup(self): - TimeMinHashSuite.setup(self) - self.mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True) - - self.populated_mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True) - for seq in self.sequences: - self.populated_mh.add_sequence(seq) - - -def time_get_mins_abundance(self): - mh = self.populated_mh - for i in range(GET_MINS_RANGE): - mh.get_mins(with_abundance=True) - - -def time_set_abundances(self): - mh = self.mh - mins = self.populated_mh.get_mins(with_abundance=True) - for i in range(SET_ABUNDANCES_RANGE): - mh.set_abundances(mins) - - -def time_set_abundances_noclear(self): - mh = self.mh - mins = self.populated_mh.get_mins(with_abundance=True) - for i in range(SET_ABUNDANCES_RANGE): - mh.set_abundances(mins, clear=False) - - -#################### - - -@pytest.fixture -def zipstore(): - import zipfile - - zf = NamedTemporaryFile() - - with zipfile.ZipFile(zf, mode="w", compression=zipfile.ZIP_STORED) as storage: - for i in range(ZIP_STORAGE_WRITE): - # just so we have lots of entries - storage.writestr(str(i), b"0") - # one big-ish entry - storage.writestr("sig1", b"9" * 1_000_000) - - yield zf - - zf.close() - - -def test_load_from_zipstorage(benchmark, zipstore): - @benchmark - def bench(): - with ZipStorage(zipstore.name) as storage: - for _ in range(ZIP_STORAGE_LOAD): - storage.load("sig1") - - -def test_load_small_from_zipstorage(benchmark, zipstore): - @benchmark - def bench(): - with ZipStorage(zipstore.name) as storage: - for _ in range(ZIP_STORAGE_LOAD): - storage.load("99999") diff --git a/tox.ini b/tox.ini index c1237267da..ecf66a2bcd 100644 --- a/tox.ini +++ b/tox.ini @@ -111,34 +111,6 @@ commands = asv machine --yes asv continuous latest HEAD {posargs} -[testenv:benchmarks] -description = run pytest-benchmark for benchmarking -changedir = {toxinidir} -commands = - pytest \ - --cov "{envsitepackagesdir}/sourmash" \ - --cov-config "{toxinidir}/tox.ini" \ - --cov-report= \ - --junitxml {toxworkdir}/junit.benchmarks.xml \ - --benchmark-only \ - -n 0 \ - {posargs:tests} - -[testenv:codspeed] -description = run codspeed for benchmarking -deps = - pytest-codspeed -changedir = {toxinidir} -commands = - pytest \ - --cov "{envsitepackagesdir}/sourmash" \ - --cov-config "{toxinidir}/tox.ini" \ - --cov-report= \ - --junitxml {toxworkdir}/junit.codspeed.xml \ - --codspeed \ - -k benchmarks \ - {posargs:tests} - [testenv:docs] description = invoke sphinx-build to build the HTML docs basepython = python3.10 @@ -208,7 +180,7 @@ commands = coverage xml -i -o {toxworkdir}/coverage.xml coverage html -i -d {toxworkdir}/htmlcov diff-cover --compare-branch {env:DIFF_AGAINST:origin/latest} {toxworkdir}/coverage.xml -depends = py312, py311, py310, pypy3, codspeed +depends = py312, py311, py310, pypy3 pass_env = {[testenv]pass_env} DIFF_AGAINST set_env = COVERAGE_FILE={toxworkdir}/.coverage @@ -261,7 +233,7 @@ source = src/sourmash/ python = 3.10: py310, docs, package_description, coverage 3.11: py311, coverage - 3.12: py312, coverage, codspeed + 3.12: py312, coverage [flake8] max-complexity = 22