diff --git a/.gitignore b/.gitignore index 84d036c2..a7202bec 100644 --- a/.gitignore +++ b/.gitignore @@ -124,3 +124,9 @@ pip-wheel-metadata/ mprofile*.dat *.DS_Store + +# asv +.asv +benchmarks/results +benchmarks/html +benchmarks/env diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 88167935..3831c7d8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,6 +24,7 @@ repos: hooks: - id: mypy args: [--config-file, pyproject.toml] + exclude: benchmarks/benchmarks/tools/IO.py additional_dependencies: - types-setuptools - types-requests diff --git a/benchmarks/README.md b/benchmarks/README.md index af1352ad..04a355ed 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -1,12 +1,39 @@ -# Benchmarks -`detect_and_classify.py` contains a simple script that runs -detection and classification with the small test dataset. - -## Memory -[memory_profiler](https://github.com/pythonprofilers/memory_profiler) -can be used to profile memory useage. Install, and then run -`mprof run --include-children --multiprocess detect_and_classify.py`. It is **very** -important to use these two flags to capture memory usage by the additional -processes that cellfinder_core uses. - -To show the results of the latest profile run, run `mprof plot`. +# Benchmarking with asv +[Install asv](https://asv.readthedocs.io/en/stable/installing.html) by running: +``` +pip install asv +``` + +`asv` works roughly as follows: +1. It creates a virtual environment (as defined in the config) +2. It installs the software package version of a specific commit (or of a local commit) +3. It times the benchmarking tests and saves the results to json files +4. The json files are 'published' into an html dir +5. The html dir can be visualised in a static website + +## Running benchmarks +To run benchmarks on a specific commit: +``` +$ asv run 88fbbc33^! +``` + +To run them up to a specific commit: +``` +$ asv run 88fbbc33 +``` + +To run them on a range of commits: +``` +$ asv run 827f322b..729abcf3 +``` + +To collate the benchmarks' results into a viewable website: +``` +$ asv publish +``` +This will create a tree of files in the `html` directory, but this cannot be viewed directly from the local filesystem, so we need to put them in a static site. `asv publish` also detects statistically significant decreases of performance, the results can be inspected in the 'Regression' tab of the static site. + +To visualise the results in a static site: +``` +$ asv preview +``` diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json new file mode 100644 index 00000000..1a4e32d9 --- /dev/null +++ b/benchmarks/asv.conf.json @@ -0,0 +1,188 @@ +{ + // The version of the config file format. Do not change, unless + // you know what you are doing. + "version": 1, + + // The name of the project being benchmarked + "project": "cellfinder-core", + + // The project's homepage + "project_url": "https://brainglobe.info/documentation/cellfinder/index.html", + + // The URL or local path of the source code repository for the + // project being benchmarked + // To use the upstream repository: uncomment the 1st line (and comment the 2nd) + // To use the local repository: comment the 1st line (and uncomment the 2nd) + //"repo": "https://github.com/brainglobe/cellfinder-core.git", + "repo": "..", + + // The Python project's subdirectory in your repo. If missing or + // the empty string, the project is assumed to be located at the root + // of the repository (where setup.py is located) + // "repo_subdir": "", + + // Customizable commands for building, installing, and + // uninstalling the project. See asv.conf.json documentation. + // + "install_command": ["in-dir={env_dir} python -mpip install {wheel_file}"], + "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"], + "build_command": [ + "python -m pip install build", + "python -m build", + "PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}" + ], + + // List of branches to benchmark. If not provided, defaults to "master" + // (for git) or "default" (for mercurial). + "branches": ["main"], // for git + // "branches": ["default"], // for mercurial + + // The DVCS being used. If not set, it will be automatically + // determined from "repo" by looking at the protocol in the URL + // (if remote), or by looking for special directories, such as + // ".git" (if local). + // "dvcs": "git", + + // The tool to use to create environments. May be "conda", + // "virtualenv" or other value depending on the plugins in use. + // If missing or the empty string, the tool will be automatically + // determined by looking for tools on the PATH environment + // variable. + "environment_type": "conda", + + // timeout in seconds for installing any dependencies in environment + // defaults to 10 min + //"install_timeout": 600, + + // the base URL to show a commit for the project. + "show_commit_url": "http://github.com/brainglobe/cellfinder-core/commit/", + + // The Pythons you'd like to test against. If not provided, defaults + // to the current version of Python used to run `asv`. + "pythons": ["3.10"], // same as pyproject.toml? ["3.8", "3.9", "3.10"] + + // The list of conda channel names to be searched for benchmark + // dependency packages in the specified order + "conda_channels": ["conda-forge", "defaults"], + + // A conda environment file that is used for environment creation. + // "conda_environment_file": "environment.yml", + + // The matrix of dependencies to test. Each key of the "req" + // requirements dictionary is the name of a package (in PyPI) and + // the values are version numbers. An empty list or empty string + // indicates to just test against the default (latest) + // version. null indicates that the package is to not be + // installed. If the package to be tested is only available from + // PyPi, and the 'environment_type' is conda, then you can preface + // the package name by 'pip+', and the package will be installed + // via pip (with all the conda available packages installed first, + // followed by the pip installed packages). + // + // The ``@env`` and ``@env_nobuild`` keys contain the matrix of + // environment variables to pass to build and benchmark commands. + // An environment will be created for every combination of the + // cartesian product of the "@env" variables in this matrix. + // Variables in "@env_nobuild" will be passed to every environment + // during the benchmark phase, but will not trigger creation of + // new environments. A value of ``null`` means that the variable + // will not be set for the current combination. + // + "matrix": { + "req": {}, + // "napari": ["", null], // test with and without + // // "six": ["", null], // test with and without six installed + // // "pip+emcee": [""] // emcee is only available for install with pip. + // }, + // "env": {"ENV_VAR_1": ["val1", "val2"]}, + // "env_nobuild": {"ENV_VAR_2": ["val3", null]}, + }, + + // Combinations of libraries/python versions can be excluded/included + // from the set to test. Each entry is a dictionary containing additional + // key-value pairs to include/exclude. + // + // An exclude entry excludes entries where all values match. The + // values are regexps that should match the whole string. + // + // An include entry adds an environment. Only the packages listed + // are installed. The 'python' key is required. The exclude rules + // do not apply to includes. + // + // In addition to package names, the following keys are available: + // + // - python + // Python version, as in the *pythons* variable above. + // - environment_type + // Environment type, as above. + // - sys_platform + // Platform, as in sys.platform. Possible values for the common + // cases: 'linux2', 'win32', 'cygwin', 'darwin'. + // - req + // Required packages + // - env + // Environment variables + // - env_nobuild + // Non-build environment variables + // + // "exclude": [ + // {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows + // {"environment_type": "conda", "req": {"six": null}}, // don't run without six on conda + // {"env": {"ENV_VAR_1": "val2"}}, // skip val2 for ENV_VAR_1 + // ], + // + // "include": [ + // // additional env for python2.7 + // {"python": "2.7", "req": {"numpy": "1.8"}, "env_nobuild": {"FOO": "123"}}, + // // additional env if run on windows+conda + // {"platform": "win32", "environment_type": "conda", "python": "2.7", "req": {"libpython": ""}}, + // ], + + // The directory (relative to the current directory) that benchmarks are + // stored in. If not provided, defaults to "benchmarks" + "benchmark_dir": "benchmarks", + + // The directory (relative to the current directory) to cache the Python + // environments in. If not provided, defaults to "env" + "env_dir": "env", + + // The directory (relative to the current directory) that raw benchmark + // results are stored in. If not provided, defaults to "results". + "results_dir": "results", + + // The directory (relative to the current directory) that the html tree + // should be written to. If not provided, defaults to "html". + "html_dir": "html", + + // The number of characters to retain in the commit hashes. + // "hash_length": 8, + + // `asv` will cache results of the recent builds in each + // environment, making them faster to install next time. This is + // the number of builds to keep, per environment. + "build_cache_size": 2, + + // The commits after which the regression search in `asv publish` + // should start looking for regressions. Dictionary whose keys are + // regexps matching to benchmark names, and values corresponding to + // the commit (exclusive) after which to start looking for + // regressions. The default is to start from the first commit + // with results. If the commit is `null`, regression detection is + // skipped for the matching benchmark. + // + // "regressions_first_commits": { + // "some_benchmark": "352cdf", // Consider regressions only after this commit + // "another_benchmark": null, // Skip regression detection altogether + // }, + + // The thresholds for relative change in results, after which `asv + // publish` starts reporting regressions. Dictionary of the same + // form as in ``regressions_first_commits``, with values + // indicating the thresholds. If multiple entries match, the + // maximum is taken. If no entry matches, the default is 5%. + // + // "regressions_thresholds": { + // "some_benchmark": 0.01, // Threshold of 1% + // "another_benchmark": 0.5, // Threshold of 50% + // }, +} diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/benchmarks/imports.py b/benchmarks/benchmarks/imports.py new file mode 100644 index 00000000..d7ef25cc --- /dev/null +++ b/benchmarks/benchmarks/imports.py @@ -0,0 +1,43 @@ +# ------------------------------------ +# Runtime benchmarks +# ------------------------------------ +def timeraw_import_main(): + return """ + from cellfinder_core.main import main + """ + + +def timeraw_import_io_dask(): + return """ + from cellfinder_core.tools.IO import read_with_dask + """ + + +def timeraw_import_io_tiff_meta(): + return """ + from cellfinder_core.tools.IO import get_tiff_meta + """ + + +def timeraw_import_prep_tensorflow(): + return """ + from cellfinder_core.tools.prep import prep_tensorflow + """ + + +def timeraw_import_prep_models(): + return """ + from cellfinder_core.tools.prep import prep_models + """ + + +def timeraw_import_prep_classification(): + return """ + from cellfinder_core.tools.prep import prep_classification + """ + + +def timeraw_import_prep_training(): + return """ + from cellfinder_core.tools.prep import prep_training + """ diff --git a/benchmarks/benchmarks/tools/IO.py b/benchmarks/benchmarks/tools/IO.py new file mode 100644 index 00000000..6bc56057 --- /dev/null +++ b/benchmarks/benchmarks/tools/IO.py @@ -0,0 +1,64 @@ +from pathlib import Path + +from cellfinder_core.tools.IO import get_tiff_meta, read_with_dask + +CELLFINDER_CORE_PATH = Path(__file__).parents[3] +TESTS_DATA_INTEGRATION_PATH = ( + Path(CELLFINDER_CORE_PATH) / "tests" / "data" / "integration" +) + + +class Read: + # ------------------------------------ + # Data + # ------------------------------ + detection_crop_planes_ch0 = TESTS_DATA_INTEGRATION_PATH / Path( + "detection", "crop_planes", "ch0" + ) + detection_crop_planes_ch1 = TESTS_DATA_INTEGRATION_PATH / Path( + "detection", "crop_planes", "ch1" + ) + cells_tif_files = list( + Path(TESTS_DATA_INTEGRATION_PATH, "training", "cells").glob("*.tif") + ) + non_cells_tif_files = list( + Path(TESTS_DATA_INTEGRATION_PATH, "training", "non_cells").glob( + "*.tif" + ) + ) + + # --------------------------------------------- + # Setup function + # -------------------------------------------- + def setup(self, subdir): + self.data_dir = str(subdir) + + # --------------------------------------------- + # Reading 3d arrays with dask + # -------------------------------------------- + def time_read_with_dask(self, subdir): + read_with_dask(self.data_dir) + + # parameters to sweep across + time_read_with_dask.param_names = [ + "tests_data_integration_subdir", + ] + time_read_with_dask.params = ( + [detection_crop_planes_ch0, detection_crop_planes_ch1], + ) + + # ----------------------------------------------- + # Reading metadata from tif files + # ------------------------------------------------- + def time_get_tiff_meta( + self, + subdir, + ): + get_tiff_meta(self.data_dir) + + # parameters to sweep across + time_get_tiff_meta.param_names = [ + "tests_data_integration_tiffile", + ] + + time_get_tiff_meta.params = cells_tif_files + non_cells_tif_files diff --git a/benchmarks/benchmarks/tools/__init__.py b/benchmarks/benchmarks/tools/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/benchmarks/tools/prep.py b/benchmarks/benchmarks/tools/prep.py new file mode 100644 index 00000000..09e1e755 --- /dev/null +++ b/benchmarks/benchmarks/tools/prep.py @@ -0,0 +1,77 @@ +import shutil +from pathlib import Path + +from brainglobe_utils.general.system import get_num_processes + +from cellfinder_core.tools.prep import ( + prep_classification, + prep_models, + prep_tensorflow, + prep_training, +) + + +class PrepModels: + # parameters to sweep across + param_names = ["model_name"] + params = ["resnet50_tv", "resnet50_all"] + + # increase default timeout to allow for download + timeout = 600 + + # install path + def benchmark_install_path(self): + # also allow to run as "user" on GH actions? + return Path(Path.home() / ".cellfinder-benchmarks") + + def setup(self, model_name): + self.n_free_cpus = 2 + self.n_processes = get_num_processes( + min_free_cpu_cores=self.n_free_cpus + ) + self.trained_model = None + self.model_weights = None + self.install_path = self.benchmark_install_path() + self.model_name = model_name + + # remove .cellfinder-benchmarks dir if it exists + shutil.rmtree(self.install_path, ignore_errors=True) + + def teardown(self, model_name): + # remove .cellfinder-benchmarks dir after benchmarks + shutil.rmtree(self.install_path) + + def time_prep_models(self, model_name): + prep_models( + self.trained_model, + self.model_weights, + self.install_path, + model_name, + ) + + def time_prep_classification(self, model_name): + prep_classification( + self.trained_model, + self.model_weights, + self.install_path, + model_name, + self.n_free_cpus, + ) + + def time_prep_training(self, model_name): + prep_training( + self.n_free_cpus, + self.trained_model, + self.model_weights, + self.install_path, + model_name, + ) + + +class PrepTF: + def setup(self): + n_free_cpus = 2 + self.n_processes = get_num_processes(min_free_cpu_cores=n_free_cpus) + + def time_prep_tensorflow(self): + prep_tensorflow(self.n_processes) diff --git a/benchmarks/mem_benchmarks/README.md b/benchmarks/mem_benchmarks/README.md new file mode 100644 index 00000000..af1352ad --- /dev/null +++ b/benchmarks/mem_benchmarks/README.md @@ -0,0 +1,12 @@ +# Benchmarks +`detect_and_classify.py` contains a simple script that runs +detection and classification with the small test dataset. + +## Memory +[memory_profiler](https://github.com/pythonprofilers/memory_profiler) +can be used to profile memory useage. Install, and then run +`mprof run --include-children --multiprocess detect_and_classify.py`. It is **very** +important to use these two flags to capture memory usage by the additional +processes that cellfinder_core uses. + +To show the results of the latest profile run, run `mprof plot`. diff --git a/benchmarks/detect_and_classify.py b/benchmarks/mem_benchmarks/detect_and_classify.py similarity index 100% rename from benchmarks/detect_and_classify.py rename to benchmarks/mem_benchmarks/detect_and_classify.py diff --git a/benchmarks/filter_2d.py b/benchmarks/mem_benchmarks/filter_2d.py similarity index 100% rename from benchmarks/filter_2d.py rename to benchmarks/mem_benchmarks/filter_2d.py diff --git a/benchmarks/filter_3d.py b/benchmarks/mem_benchmarks/filter_3d.py similarity index 100% rename from benchmarks/filter_3d.py rename to benchmarks/mem_benchmarks/filter_3d.py diff --git a/pyproject.toml b/pyproject.toml index f432275c..9482f3f1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -123,13 +123,14 @@ module = [ "scipy.*", "skimage.*", "sklearn.*", + "cellfinder_core.tools.prep.*", ] ignore_missing_imports = true [[tool.mypy.overrides]] module = [ "cellfinder_core.detect.*", - "cellfinder_core.classify.*" + "cellfinder_core.classify.*", ] disallow_untyped_defs = true disallow_incomplete_defs = true