Merge pull request #25 from remkade/python_api_cleanup

Dropping <String> from MatchSpec cleaning up python api
remkade · Apr 7, 2023 · ad2f56a · ad2f56a
2 parents ed7c707 + d00eb41
commit ad2f56a
Show file tree

Hide file tree

Showing 15 changed files with 1,309,923 additions and 195 deletions.
diff --git a/.gitignore b/.gitignore
@@ -3,4 +3,5 @@
 .idea/*
 pvenv/*
 python/*
-main.py
+main.py
+venv/*
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "matchspec"
-version = "0.1.1"
+version = "0.2.0"
 edition = "2021"
 
 [lib]
@@ -20,7 +20,7 @@ version-compare = "0.1"
 criterion = "0.3"
 
 [features]
-default = []
+default = ["python"]
 python = ["pyo3/extension-module"]
 
 [[bench]]

diff --git a/README.md b/README.md
@@ -2,6 +2,50 @@
 
 A Conda MatchSpec implementation in pure Rust. This allows you to parse a matchspec and validate it against a package to see if it matches.
 
+# Python Library
+
+This library exposes a few simple functions:
+
+## `match_against_matchspec()`
+
+Takes a `matchspec` as a `str` and matches it against a `package_name` and `version` (both `str`). Returns a `bool`.
+
+``` python
+import rust_matchspec
+rust_matchspec.match_against_matchspec('python>=3.0', 'python', '3.10.1') # returns True
+```
+
+## `filter_package_list()`
+
+Takes a `list` of `dicts` and returns all the dicts inside that match a given matchspec. The `dicts` must have a `name` key with a `str` value, but all other fields are optional.
+
+```python
+import rust_matchspec
+list = [{'name': 'tensorflow', 'version': '2.10.0'},
+	{'name': 'pytorch', 'version': '2.0.0'},
+	{'name': 'pytorch', 'version': '1.11.1'}]
+
+rust_matchspec.filter_package_list('pytorch>1.12', list) # returns [PackageCandidate(name=pytorch)]
+```
+
+Possible keys:
+
+| Key          | Expected Type | Required? |
+|--------------|---------------|-----------|
+| name         | str           | yes       |
+| version      | str           |           |
+| build        | str           |           |
+| build_number | u32           |           |
+| depends      | [str]         |           |
+| license      | str           |           |
+| md5          | str           |           |
+| sha256       | str           |           |
+| size         | u64           |           |
+| subdir       | str           |           |
+| timestamp    | u64           |           |
+
+# Rust Library
+
 ## Example
 
 The way you instantiate a MatchSpec is by parsing a string into the type:
@@ -10,7 +54,7 @@ The way you instantiate a MatchSpec is by parsing a string into the type:
 use rust_matchspec::{CompoundSelector, MatchSpec, Selector};
 
 // Create the MatchSpec by parsing a String or &str
-let matchspec: MatchSpec<String> = "main/linux-64::pytorch>1.10.2".parse().unwrap();
+let matchspec: MatchSpec = "main/linux-64::pytorch>1.10.2".parse().unwrap();
 
 // You then have the data accessible inside the MatchSpec struct if you want it
 // Package name is the only mandatory field in a matchspec
@@ -33,11 +77,33 @@ assert!(matchspec.is_package_version_match(&"pytorch", &"1.11.0"))
 
 ## Benchmarking
 
-This library contains benchmarks aimed at checking the speed of our implementation against other languages and ensure speed doesn't regress. This is a pure Rust benchmark so you'll need to view it with some skepticism if you want to compare this implementation against others. Benchmark harnesses and the data all need to be identical for a benchmark to really provide value.
+This library contains benchmarks aimed at checking the speed of our implementation against other languages and ensure speed doesn't regress. These are contrived benchmarks to test raw speed, so take them (and all benchmarks) with a bit of skepticism. Benchmark harnesses and the data all need to be identical for a benchmark to really provide value.
+
+
+### Python
+
+The Python benchmarks use [pytest-benchmark](https://pytest-benchmark.readthedocs.io/en/stable/).
+
+Steps to run the benchmarks:
+
+```bash
+# Setup the conda env
+conda env create -f ./environment.yml
+conda activate rust_matchspec
+
+# Build an optimized wheel
+maturin build --release
+
+# install it
+pip install ./target/wheels/rust_matchspec*.whl
+
+# Finally, run the benchmark
+pytest
+```
 
-### Running the benchmarks
+### Rust
 
-These benchmarks use [Criterion.rs](https://bheisler.github.io/criterion.rs/book/criterion_rs.html) to provide the benchmarking framework. Its pretty easy to run the benchmarks on stable rust:
+The Rust benchmarks use [Criterion.rs](https://bheisler.github.io/criterion.rs/book/criterion_rs.html) to provide the benchmarking framework. Its pretty easy to run the benchmarks on stable rust:
 
 ```bash
 cargo bench 

diff --git a/benches/parsing.rs b/benches/parsing.rs
@@ -7,20 +7,20 @@ fn criterion_benchmark(c: &mut Criterion) {
     c.bench_function("Package name only", |b| {
         b.iter(|| {
             // This is a complex but not unlikely matchspec
-            black_box("tzdata").parse::<MatchSpec<String>>()
+            black_box("tzdata").parse::<MatchSpec>()
         })
     });
     c.bench_function("Package name and version", |b| {
         b.iter(|| {
             // This is a complex but not unlikely matchspec
-            black_box("openssl>1.1.1g").parse::<MatchSpec<String>>()
+            black_box("openssl>1.1.1g").parse::<MatchSpec>()
         })
     });
     c.bench_function("All possible matchers", |b| {
         b.iter(|| {
             // This is a complex but not unlikely matchspec
             black_box("conda-forge/linux-64:NAMESPACE:tensorflow>=1.9.2[license=\"GPL\", subdir=\"linux-64\"]")
-                .parse::<MatchSpec<String>>()
+                .parse::<MatchSpec>()
         })
     });
 
@@ -37,7 +37,7 @@ fn criterion_benchmark(c: &mut Criterion) {
             .collect();
         b.iter(|| {
             for d in &depends {
-                d.parse::<MatchSpec<String>>().unwrap();
+                d.parse::<MatchSpec>().unwrap();
             }
         })
     });

diff --git a/benches/test_python.py b/benches/test_python.py
@@ -0,0 +1,84 @@
+from conda.models.match_spec import MatchSpec
+from pathlib import Path
+import rust_matchspec
+import json
+
+test_data = Path('./test_data')
+depends_file = test_data / 'linux_64-depends.txt'
+repodata_file = test_data / 'repodata-linux-64.json'
+
+
+def bench_match_against_matchspec(list: [str]):
+    """ Takes the list of matchspecs and matches against python 3.9.1 """
+    for item in list:
+        rust_matchspec.match_against_matchspec(item, 'python', '3.9.1')
+
+
+def test_rust_matchspec_on_repodata_depends(benchmark):
+    """
+    Test the rust_matchspec.match_against_matchspec using the
+    linux_64-depends.txt file.
+    """
+    with open(depends_file) as f:
+        depends = f.readlines()
+
+    benchmark(bench_match_against_matchspec, list=depends)
+
+
+def bench_conda_against_repodata_depends(list: [str]):
+    """
+    Runs a list of matchspecs against a static package, this is a little
+    contrived, but it is meant to compare the instantiation and filtering speed
+    of MatchSpec
+    """
+    for item in list:
+        ms = MatchSpec(item)
+        ms.match({'name': 'python', 'version': '3.9.1',
+                 'build': 'hbdb9e5c_0', 'build_number': 0})
+
+
+def test_conda_matchspec_on_repodata_depends(benchmark):
+    """
+    Test Conda's MatchSpec against the linux_64-depends.txt file
+    """
+    with open(depends_file) as f:
+        depends = f.readlines()
+
+    benchmark(bench_conda_against_repodata_depends, list=depends)
+
+
+def bench_rust_matchspec_filter_package_list(list: [dict[str, str]]):
+    """
+    Runs rust_matchspec.filter_package_list() against a list of packages
+    """
+    _matches = rust_matchspec.filter_package_list('python>=3.9.1', list)
+
+
+def test_rust_matchspec_filter_package_list(benchmark):
+    """
+    Test rust_matchspec's filter_package_list() against the full linux-64
+    repodata.json from Anaconda's defaults.
+    """
+    with open(repodata_file) as f:
+        repodata = list(json.load(f)['packages'].values())
+
+    benchmark(bench_rust_matchspec_filter_package_list, list=repodata)
+
+
+def bench_conda_filter_package_list(list: [dict[str, str]]):
+    """
+    Runs uses MatchSpec against a list of packages to filter out non-matches
+    """
+    ms = MatchSpec('python>=3.9.1')
+    _matches = [p for p in list if ms.match(p)]
+
+
+def test_conda_filter_package_list(benchmark):
+    """
+    Benchmark conda MatchSpec filtering all of the linux-64 repodata from
+    Anaconda's defaults
+    """
+    with open(repodata_file) as f:
+        repodata = list(json.load(f)['packages'].values())
+
+    benchmark(bench_conda_filter_package_list, list=repodata)
diff --git a/environment.yml b/environment.yml
@@ -0,0 +1,8 @@
+---
+name: rust_matchspec
+channels:
+  - defaults
+dependencies:
+  - conda
+  - pytest
+  - pytest-benchmark
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,9 +4,14 @@ requires = ["maturin>=0.14,<0.15"]
 
 [project]
 name = "rust_matchspec"
-version = "0.1.1"
+version = "0.2.0"
 description = "A conda matchspec written in Rust"
 requires-python = ">=3.7"
+classifiers = [
+    "Programming Language :: Rust",
+    "Programming Language :: Python :: Implementation :: CPython",
+    "Programming Language :: Python :: Implementation :: PyPy",
+]
 
 [tool.maturin]
 features = ["python"]

diff --git a/python/rust_matchspec/__init__.py b/python/rust_matchspec/__init__.py
@@ -1,4 +1,4 @@
-from rust_matchspec import rust_matchspec
+from .rust_matchspec import *
 
-__name__="rust_matchspec"
-__version__="0.1.0"
+__name__ = "rust_matchspec"
+__version__ = "0.2.0"
diff --git a/src/error.rs b/src/error.rs
@@ -12,4 +12,3 @@ impl Display for MatchSpecError {
         write!(f, "{}", self.message)
     }
 }
-
diff --git a/src/lib.rs b/src/lib.rs
@@ -1,11 +1,10 @@
 #![doc = include_str ! ("../README.md")]
 
-pub mod matchspec;
+pub mod error;
 mod input_table;
-mod parsers;
+pub mod matchspec;
 pub mod package_candidate;
-pub mod error;
-#[cfg(feature = "python")]
+mod parsers;
 pub mod python;
 
 pub use crate::matchspec::*;
-Original file line number
+Diff line change
@@ Expand Up / @@ -3,4 +3,5 @@ @@
     .idea/*
     pvenv/*
     python/*
-    main.py
+    main.py
+    venv/*
Original file line number	Diff line number	Diff line change
Expand Up		@@ -12,4 +12,3 @@ impl Display for MatchSpecError {
		write!(f, "{}", self.message)
		}
		}