diff --git a/.github/workflows/test_and_deploy.yml b/.github/workflows/test_and_deploy.yml index 5a194ebf..4de3a200 100644 --- a/.github/workflows/test_and_deploy.yml +++ b/.github/workflows/test_and_deploy.yml @@ -32,20 +32,22 @@ jobs: - uses: neuroinformatics-unit/actions/check_manifest@v2 test: - needs: [linting,manifest] + needs: [linting, manifest] + name: ${{ matrix.os }} py${{ matrix.python-version }} runs-on: ${{ matrix.os }} env: INPUT_COREDEV: ${{ github.event.inputs.coredev }} strategy: fail-fast: false matrix: - # Run across a mixture of Python versions and operating systems + # Run tests on ubuntu across all supported versions + python-version: ["3.9", "3.10"] + os: [ubuntu-latest] + # Include at least one MacOS and Windows test include: - os: macos-latest python-version: "3.10" - os: windows-latest - python-version: "3.9" - - os: ubuntu-latest python-version: "3.10" steps: - uses: neuroinformatics-unit/actions/test@v2 @@ -60,7 +62,8 @@ jobs: steps: - uses: neuroinformatics-unit/actions/build_sdist_wheels@v2 - deploy: + upload_all: + name: Publish build distributions needs: [build_sdist_wheels] runs-on: ubuntu-latest if: github.event_name == 'push' && github.ref_type == 'tag' @@ -75,7 +78,7 @@ jobs: password: ${{ secrets.TWINE_API_KEY }} build-and-push-docker: - needs: deploy + needs: [upload_all] runs-on: ubuntu-latest permissions: contents: read @@ -83,7 +86,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Log in to the Container registry uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9 diff --git a/.gitignore b/.gitignore index 8c1f8334..56a96743 100644 --- a/.gitignore +++ b/.gitignore @@ -77,10 +77,17 @@ doc/build/ _build/ mkdocs.yml +# MkDocs documentation +site/ + # PyBuilder target/ -# Jupyter Notebook +# Pycharm and VSCode +.idea/ +.vscode/ + +# IPython Notebook .ipynb_checkpoints # pyenv @@ -109,18 +116,16 @@ venv.bak/ # Rope project settings .ropeproject -# mkdocs documentation -/site - # mypy .mypy_cache/ -.idea/ - *.~lock.* pip-wheel-metadata/ -*.DS_Store +# OS +.DS_Store -*.vscode/ +# written by setuptools_scm +**/_version.py +benchmarks/results/* diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5e015b5c..7e44b7b1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -30,3 +30,9 @@ repos: additional_dependencies: - types-setuptools - types-requests + - repo: https://github.com/mgedmin/check-manifest + rev: "0.49" + hooks: + - id: check-manifest + args: [--no-build-isolation] + additional_dependencies: [setuptools-scm] diff --git a/LICENSE b/LICENSE index 3e221c2a..77f53359 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ BSD 3-Clause License -Copyright (c) 2020, University College London +Copyright (c) 2023, BrainGlobe developers. All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/MANIFEST.in b/MANIFEST.in index f99c19d9..0b11081e 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,16 +1,23 @@ include README.md include LICENSE +include brainglobe_workflows/cellfinder/default_config.json +exclude .pre-commit-config.yaml exclude *.yaml exclude *.yml exclude Dockerfile exclude *.ini +exclude asv.conf.json recursive-include brainglobe_workflows *.py -include brainglobe_workflows/config/* + +recursive-exclude * __pycache__ +recursive-exclude * *.py[co] global-include *.pxd +prune benchmarks +prune docs prune tests prune resources diff --git a/README.md b/README.md index b831d0dc..78db778b 100644 --- a/README.md +++ b/README.md @@ -1,150 +1,139 @@ -[![Python Version](https://img.shields.io/pypi/pyversions/cellfinder.svg)](https://pypi.org/project/cellfinder) -[![PyPI](https://img.shields.io/pypi/v/cellfinder.svg)](https://pypi.org/project/cellfinder) -[![Downloads](https://pepy.tech/badge/cellfinder)](https://pepy.tech/project/cellfinder) -[![Wheel](https://img.shields.io/pypi/wheel/cellfinder.svg)](https://pypi.org/project/cellfinder) -[![Development Status](https://img.shields.io/pypi/status/cellfinder.svg)](https://github.com/brainglobe/cellfinder) -[![Tests](https://img.shields.io/github/workflow/status/brainglobe/cellfinder/tests)]( - https://github.com/brainglobe/cellfinder/actions) -[![codecov](https://codecov.io/gh/brainglobe/cellfinder/branch/master/graph/badge.svg?token=s3MweEFPhl)](https://codecov.io/gh/brainglobe/cellfinder) +[![Python Version](https://img.shields.io/pypi/pyversions/brainglobe-workflows.svg)](https://pypi.org/project/brainglobe-workflows) +[![PyPI](https://img.shields.io/pypi/v/brainglobe-workflows.svg)](https://pypi.org/project/brainglobe-workflows) +[![Downloads](https://pepy.tech/badge/brainglobe-workflows)](https://pepy.tech/project/brainglobe-workflows) +[![Wheel](https://img.shields.io/pypi/wheel/brainglobe-workflows.svg)](https://pypi.org/project/brainglobe-workflows) +[![Development Status](https://img.shields.io/pypi/status/brainglobe-workflows.svg)](https://github.com/brainglobe/brainglobe-workflows) +[![Tests](https://img.shields.io/github/workflow/status/brainglobe/brainglobe-workflows/tests)]( + https://github.com/brainglobe/brainglobe-workflows/actions) +[![codecov](https://codecov.io/gh/brainglobe/brainglobe-workflows/branch/master/graph/badge.svg?token=s3MweEFPhl)](https://codecov.io/gh/brainglobe/brainglobe-workflows) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/python/black) [![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/) [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit) -[![Contributions](https://img.shields.io/badge/Contributions-Welcome-brightgreen.svg)](https://docs.brainglobe.info/cellfinder/contributing) -[![Website](https://img.shields.io/website?up_message=online&url=https%3A%2F%2Fbrainglobe.info)](https://brainglobe.info/documentation/cellfinder/index.html) +[![Contributions](https://img.shields.io/badge/Contributions-Welcome-brightgreen.svg)](https://brainglobe.info/developers/index.html) +[![Website](https://img.shields.io/website?up_message=online&url=https%3A%2F%2Fbrainglobe.info)](https://brainglobe.info/documentation/brainglobe-workflows/index.html) [![Twitter](https://img.shields.io/twitter/follow/brain_globe?style=social)](https://twitter.com/brain_globe) +# BrainGlobe Workflows + +`brainglobe-workflows` is a package that provides users with a number of out-of-the-box data analysis workflows employed in neuroscience, implemented using BrainGlobe tools. + +At present, the package currently offers the following workflows: + +- [cellfinder](#cellfinder): Whole-brain detection, registration, and analysis. The successor to the old [cellfinder CLI](TODO:permalnk to deprecated cellfinder tag on repo) TODO: rename tool appropriately and give flavour text + +## Installation + +`brainglobe-workflows` comes packaged with version 1 of BrainGlobe, so the easiest way to make sure you get the latest release and stay up to date is to install that package - [follow this link to see the install instructions](TODO: link me!). + +If you want to install BrainGlobe workflows as a standalone tool, you can run `pip install` in your desired environment: + +```bash +pip install brainglobe-workflows +``` + +## Contributing + +Contributions to BrainGlobe are more than welcome. +Please see the [developers guide](https://brainglobe.info/developers/index.html). + +## Citing `brainglobe-workflows` + +**If you use any tools in the [brainglobe suite](https://brainglobe.info/documentation/index.html), please [let us know](mailto:code@adamltyson.com?subject=cellfinder), and we'd be happy to promote your paper/talk etc.** + +If you find [`cellfinder`](#cellfinder) useful, and use it in your research, please cite the paper outlining the cell detection algorithm: +> Tyson, A. L., Rousseau, C. V., Niedworok, C. J., Keshavarzi, S., Tsitoura, C., Cossell, L., Strom, M. and Margrie, T. W. (2021) “A deep learning algorithm for 3D cell detection in whole mouse brain image datasets’ PLOS Computational Biology, 17(5), e1009074 +[https://doi.org/10.1371/journal.pcbi.1009074](https://doi.org/10.1371/journal.pcbi.1009074) +> +If you use any of the image registration functions in `cellfinder`, please also cite [`brainreg`](https://github.com/brainglobe/brainreg#citing-brainreg). + +--- + # Cellfinder + +**TODO: move this information to an appropriate place on the website** + Whole-brain cell detection, registration and analysis. -**N.B. If you want to just use the cell detection part of cellfinder, please -see the standalone [cellfinder-core](https://github.com/brainglobe/cellfinder-core) -package, or the [cellfinder plugin](https://github.com/brainglobe/cellfinder-napari) -for [napari](https://napari.org/).** +**N.B. If you want to just use the cell detection part of cellfinder, please see the standalone [cellfinder-core](https://github.com/brainglobe/cellfinder-core) package, or the [cellfinder plugin](https://github.com/brainglobe/cellfinder-napari) for [napari](https://napari.org/).** ---- `cellfinder` is a collection of tools developed by [Adam Tyson](https://github.com/adamltyson), [Charly Rousseau](https://github.com/crousseau) and [Christian Niedworok](https://github.com/cniedwor) in the [Margrie Lab](https://www.sainsburywellcome.org/web/groups/margrie-lab), generously supported by the [Sainsbury Wellcome Centre](https://www.sainsburywellcome.org/web/). -`cellfinder` is a designed for the analysis of whole-brain imaging data such as - [serial-section imaging](https://sainsburywellcomecentre.github.io/OpenSerialSection/) - and lightsheet imaging in cleared tissue. The aim is to provide a single solution for: - - * Cell detection (initial cell candidate detection and refinement using - deep learning) (using [cellfinder-core](https://github.com/brainglobe/cellfinder-core)) - * Atlas registration (using [brainreg](https://github.com/brainglobe/brainreg)) - * Analysis of cell positions in a common space +`cellfinder` is a designed for the analysis of whole-brain imaging data such as [serial-section imaging](https://sainsburywellcomecentre.github.io/OpenSerialSection/) and lightsheet imaging in cleared tissue. +The aim is to provide a single solution for: - --- -Installation is with -`pip install cellfinder` +- Cell detection (initial cell candidate detection and refinement using deep learning) (using [cellfinder-core](https://github.com/brainglobe/cellfinder-core)), +- Atlas registration (using [brainreg](https://github.com/brainglobe/brainreg)), +- Analysis of cell positions in a common space. ---- Basic usage: + ```bash cellfinder -s signal_images -b background_images -o output_dir --metadata metadata ``` -Full documentation can be -found [here](https://brainglobe.info/documentation/cellfinder/index.html). -This software is at a very early stage, and was written with our data in mind. -Over time we hope to support other data types/formats. If you have any issues, please get in touch [on the forum](https://forum.image.sc/tag/brainglobe) or by -[raising an issue](https://github.com/brainglobe/cellfinder/issues/new/choose). +Full documentation can be found [here](https://brainglobe.info/documentation/cellfinder/index.html). +This software is at a very early stage, and was written with our data in mind. +Over time we hope to support other data types/formats. +If you have any issues, please get in touch [on the forum](https://forum.image.sc/tag/brainglobe) or by [raising an issue](https://github.com/brainglobe/cellfinder/issues/new/choose). ---- ## Illustration ### Introduction -cellfinder takes a stitched, but otherwise raw whole-brain dataset with at least -two channels: - * Background channel (i.e. autofluorescence) - * Signal channel, the one with the cells to be detected: -![raw](https://raw.githubusercontent.com/brainglobe/cellfinder/master/resources/raw.png) -**Raw coronal serial two-photon mouse brain image showing labelled cells** +cellfinder takes a stitched, but otherwise raw whole-brain dataset with at least two channels: + +- Background channel (i.e. autofluorescence), +- Signal channel, the one with the cells to be detected: +![Raw coronal serial two-photon mouse brain image showing labelled cells](https://raw.githubusercontent.com/brainglobe/cellfinder/master/resources/raw.png) ### Cell candidate detection -Classical image analysis (e.g. filters, thresholding) is used to find -cell-like objects (with false positives): -![raw](https://raw.githubusercontent.com/brainglobe/cellfinder/master/resources/detect.png) -**Candidate cells (including many artefacts)** +Classical image analysis (e.g. filters, thresholding) is used to find cell-like objects (with false positives): +![Candidate cells (including many artefacts)](https://raw.githubusercontent.com/brainglobe/cellfinder/master/resources/detect.png) ### Cell candidate classification -A deep-learning network (ResNet) is used to classify cell candidates as true -cells or artefacts: -![raw](https://raw.githubusercontent.com/brainglobe/cellfinder/master/resources/classify.png) -**Cassified cell candidates. Yellow - cells, Blue - artefacts** +A deep-learning network (ResNet) is used to classify cell candidates as true cells (yellow) or artefacts (blue): + +![Cassified cell candidates. Yellow - cells, Blue - artefacts](https://raw.githubusercontent.com/brainglobe/cellfinder/master/resources/classify.png) -### Registration and segmentation (brainreg) -Using [brainreg](https://github.com/brainglobe/brainreg), -cellfinder aligns a template brain and atlas annotations (e.g. -the Allen Reference Atlas, ARA) to the sample allowing detected cells to be assigned -a brain region. +### Registration and segmentation (`brainreg`) -This transformation can be inverted, allowing detected cells to be -transformed to a standard anatomical space. +Using [`brainreg`](https://github.com/brainglobe/brainreg), `cellfinder` aligns a template brain and atlas annotations (e.g. the Allen Reference Atlas, ARA) to the sample allowing detected cells to be assigned a brain region. -![raw](https://raw.githubusercontent.com/brainglobe/cellfinder/master/resources/register.png) -**ARA overlaid on sample image** +This transformation can be inverted, allowing detected cells to be transformed to a standard anatomical space. + +![ARA overlaid on sample image](https://raw.githubusercontent.com/brainglobe/cellfinder/master/resources/register.png) ### Analysis of cell positions in a common anatomical space -Registration to a template allows for powerful group-level analysis of cellular -disributions. *(Example to come)* + +Registration to a template allows for powerful group-level analysis of cellular distributions. +*(Example to come)* ## Examples + *(more to come)* ### Tracing of inputs to retrosplenial cortex (RSP) -Input cell somas detected by cellfinder, aligned to the Allen Reference Atlas, -and visualised in [brainrender](https://github.com/brainglobe/brainrender) along + +Input cell somas detected by cellfinder, aligned to the Allen Reference Atlas, and visualised in [brainrender](https://github.com/brainglobe/brainrender) along with RSP. ![brainrender](https://raw.githubusercontent.com/brainglobe/cellfinder/master/resources/brainrender.png) -Data courtesy of Sepiedeh Keshavarzi and Chryssanthi Tsitoura. [Details here](https://www.youtube.com/watch?v=pMHP0o-KsoQ) +Data courtesy of Sepiedeh Keshavarzi and Chryssanthi Tsitoura. +[Details here](https://www.youtube.com/watch?v=pMHP0o-KsoQ) ## Visualisation -cellfinder comes with a plugin ([brainglobe-napari-io](https://github.com/brainglobe/brainglobe-napari-io)) for [napari](https://github.com/napari/napari) to view your data - -#### Usage -* Open napari (however you normally do it, but typically just type `napari` into your terminal, or click on your desktop icon) - -#### Load cellfinder XML file -* Load your raw data (drag and drop the data directories into napari, one at a time) -* Drag and drop your cellfinder XML file (e.g. `cell_classification.xml`) into napari. +You can view your data using the [brainglobe-napari-io](https://github.com/brainglobe/brainglobe-napari-io) plugin for [napari](https://github.com/napari/napari). -#### Load cellfinder directory -* Load your raw data (drag and drop the data directories into napari, one at a time) -* Drag and drop your cellfinder output directory into napari. - -The plugin will then load your detected cells (in yellow) and the rejected cell -candidates (in blue). If you carried out registration, then these results will be -overlaid (similarly to the loading brainreg data, but transformed to the -coordinate space of your raw data). - -![load_data](https://raw.githubusercontent.com/brainglobe/brainglobe-napari-io/master/resources/load_data.gif) -**Loading raw data** - -![load_data](https://raw.githubusercontent.com/brainglobe/brainglobe-napari-io/master/resources/load_results.gif) -**Loading cellfinder results** - - -## Contributing -Contributions to cellfinder are more than welcome. Please see the [developers guide](https://brainglobe.info/developers/index.html). - - -## Citing cellfinder - -If you find cellfinder useful, and use it in your research, please cite the paper outlining the cell detection algorithm: -> Tyson, A. L., Rousseau, C. V., Niedworok, C. J., Keshavarzi, S., Tsitoura, C., Cossell, L., Strom, M. and Margrie, T. W. (2021) “A deep learning algorithm for 3D cell detection in whole mouse brain image datasets’ PLOS Computational Biology, 17(5), e1009074 -[https://doi.org/10.1371/journal.pcbi.1009074](https://doi.org/10.1371/journal.pcbi.1009074) -> -If you use any of the image registration functions in cellfinder, please also cite [brainreg](https://github.com/brainglobe/brainreg#citing-brainreg). +- Open napari (however you normally do it, but typically just type `napari` into your terminal, or click on your desktop icon). +- Load your raw data (drag and drop the data directories into napari, one at a time). ![Loading raw data](https://raw.githubusercontent.com/brainglobe/brainglobe-napari-io/master/resources/load_data.gif) +- Drag and drop your cellfinder XML file (e.g. `cell_classification.xml`) and/or cellfinder output directory into napari. ![Loading cellfinder results](https://raw.githubusercontent.com/brainglobe/brainglobe-napari-io/master/resources/load_results.gif) -**If you use this, or any other tools in the brainglobe suite, please - [let us know](mailto:code@adamltyson.com?subject=cellfinder), and - we'd be happy to promote your paper/talk etc.** +The plugin will then load your detected cells (in yellow) and the rejected cell candidates (in blue). +If you carried out registration, then these results will be overlaid (similarly to the loading `brainreg` data, but transformed to the coordinate space of your raw data). diff --git a/asv.conf.json b/asv.conf.json new file mode 100644 index 00000000..d620a545 --- /dev/null +++ b/asv.conf.json @@ -0,0 +1,194 @@ +{ + // The version of the config file format. Do not change, unless + // you know what you are doing. + "version": 1, + + // The name of the project being benchmarked + "project": "brainglobe_workflows", + + // The project's homepage + "project_url": "https://github.com/brainglobe/brainglobe-workflows", + + // The URL or local path of the source code repository for the + // project being benchmarked + "repo": ".", + + // The Python project's subdirectory in your repo. If missing or + // the empty string, the project is assumed to be located at the root + // of the repository. + // "repo_subdir": "", + + // Customizable commands for building the project. + // See asv.conf.json documentation. + // To build the package using pyproject.toml (PEP518), uncomment the following lines + "build_command": [ + "python -m pip install build", + "python -m build", + "PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}" + ], + // To build the package using setuptools and a setup.py file, uncomment the following lines + // "build_command": [ + // "python setup.py build", + // "PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}" + // ], + + // Customizable commands for installing and uninstalling the project. + // See asv.conf.json documentation. + "install_command": ["in-dir={env_dir} python -mpip install --force-reinstall {wheel_file}"], + "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"], + + // List of branches to benchmark. If not provided, defaults to "master" + // (for git) or "default" (for mercurial). + "branches": ["main"], // for git + // "branches": ["default"], // for mercurial + + // The DVCS being used. If not set, it will be automatically + // determined from "repo" by looking at the protocol in the URL + // (if remote), or by looking for special directories, such as + // ".git" (if local). + // "dvcs": "git", + + // The tool to use to create environments. May be "conda", + // "virtualenv", "mamba" (above 3.8) + // or other value depending on the plugins in use. + // If missing or the empty string, the tool will be automatically + // determined by looking for tools on the PATH environment + // variable. + "environment_type": "conda", + + // timeout in seconds for installing any dependencies in environment + // defaults to 10 min + //"install_timeout": 600, + + // the base URL to show a commit for the project. + "show_commit_url": "https://github.com/brainglobe/brainglobe-workflows/commit/", + + // The Pythons you'd like to test against. If not provided, defaults + // to the current version of Python used to run `asv`. + "pythons": ["3.10"], + + // The list of conda channel names to be searched for benchmark + // dependency packages in the specified order + "conda_channels": ["conda-forge", "defaults"], + + // A conda environment file that is used for environment creation. + // "conda_environment_file": "environment.yml", + + // The matrix of dependencies to test. Each key of the "req" + // requirements dictionary is the name of a package (in PyPI) and + // the values are version numbers. An empty list or empty string + // indicates to just test against the default (latest) + // version. null indicates that the package is to not be + // installed. If the package to be tested is only available from + // PyPi, and the 'environment_type' is conda, then you can preface + // the package name by 'pip+', and the package will be installed + // via pip (with all the conda available packages installed first, + // followed by the pip installed packages). + // + // The ``@env`` and ``@env_nobuild`` keys contain the matrix of + // environment variables to pass to build and benchmark commands. + // An environment will be created for every combination of the + // cartesian product of the "@env" variables in this matrix. + // Variables in "@env_nobuild" will be passed to every environment + // during the benchmark phase, but will not trigger creation of + // new environments. A value of ``null`` means that the variable + // will not be set for the current combination. + // + // "matrix": { + // "req": { + // "numpy": ["1.6", "1.7"], + // "six": ["", null], // test with and without six installed + // "pip+emcee": [""] // emcee is only available for install with pip. + // }, + // "env": {"ENV_VAR_1": ["val1", "val2"]}, + // "env_nobuild": {"ENV_VAR_2": ["val3", null]}, + // }, + + // Combinations of libraries/python versions can be excluded/included + // from the set to test. Each entry is a dictionary containing additional + // key-value pairs to include/exclude. + // + // An exclude entry excludes entries where all values match. The + // values are regexps that should match the whole string. + // + // An include entry adds an environment. Only the packages listed + // are installed. The 'python' key is required. The exclude rules + // do not apply to includes. + // + // In addition to package names, the following keys are available: + // + // - python + // Python version, as in the *pythons* variable above. + // - environment_type + // Environment type, as above. + // - sys_platform + // Platform, as in sys.platform. Possible values for the common + // cases: 'linux2', 'win32', 'cygwin', 'darwin'. + // - req + // Required packages + // - env + // Environment variables + // - env_nobuild + // Non-build environment variables + // + // "exclude": [ + // {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows + // {"environment_type": "conda", "req": {"six": null}}, // don't run without six on conda + // {"env": {"ENV_VAR_1": "val2"}}, // skip val2 for ENV_VAR_1 + // ], + // + // "include": [ + // // additional env for python2.7 + // {"python": "2.7", "req": {"numpy": "1.8"}, "env_nobuild": {"FOO": "123"}}, + // // additional env if run on windows+conda + // {"platform": "win32", "environment_type": "conda", "python": "2.7", "req": {"libpython": ""}}, + // ], + + // The directory (relative to the current directory) that benchmarks are + // stored in. If not provided, defaults to "benchmarks" + // "benchmark_dir": "benchmarks", + + // The directory (relative to the current directory) to cache the Python + // environments in. If not provided, defaults to "env" + "env_dir": ".asv/env", + + // The directory (relative to the current directory) that raw benchmark + // results are stored in. If not provided, defaults to "results". + "results_dir": "benchmarks/results", + + // The directory (relative to the current directory) that the html tree + // should be written to. If not provided, defaults to "html". + "html_dir": "benchmarks/html", + + // The number of characters to retain in the commit hashes. + // "hash_length": 8, + + // `asv` will cache results of the recent builds in each + // environment, making them faster to install next time. This is + // the number of builds to keep, per environment. + "build_cache_size": 2, + + // The commits after which the regression search in `asv publish` + // should start looking for regressions. Dictionary whose keys are + // regexps matching to benchmark names, and values corresponding to + // the commit (exclusive) after which to start looking for + // regressions. The default is to start from the first commit + // with results. If the commit is `null`, regression detection is + // skipped for the matching benchmark. + // + // "regressions_first_commits": { + // "some_benchmark": "352cdf", // Consider regressions only after this commit + // "another_benchmark": null, // Skip regression detection altogether + // }, + + // The thresholds for relative change in results, after which `asv + // publish` starts reporting regressions. Dictionary of the same + // form as in ``regressions_first_commits``, with values + // indicating the thresholds. If multiple entries match, the + // maximum is taken. If no entry matches, the default is 5%. + // + // "regressions_thresholds": { + // "some_benchmark": 0.01, // Threshold of 1% + // "another_benchmark": 0.5, // Threshold of 50% + // }, +} diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/cellfinder.py b/benchmarks/cellfinder.py new file mode 100644 index 00000000..76d364bc --- /dev/null +++ b/benchmarks/cellfinder.py @@ -0,0 +1,227 @@ +import json +import shutil +from pathlib import Path + +import pooch +from brainglobe_utils.IO.cells import save_cells +from cellfinder_core.main import main as cellfinder_run +from cellfinder_core.tools.IO import read_with_dask + +from brainglobe_workflows.cellfinder.cellfinder_main import ( + DEFAULT_JSON_CONFIG_PATH, + CellfinderConfig, + run_workflow_from_cellfinder_run, +) +from brainglobe_workflows.cellfinder.cellfinder_main import ( + setup as setup_cellfinder_workflow, +) + + +class TimeBenchmarkPrepGIN: + """ + + A base class for timing benchmarks for the cellfinder workflow. + + It includes: + - a setup_cache function that downloads the GIN data specified in the + default_config.json to a local directory (created by asv). This function + runs only once before all repeats of the benchmark. + - a setup function, that runs the setup steps for the workflow. + - a teardown function, that removes the output directory. + + Notes + ----- + The class includes some predefined attributes for timing benchmarks. For + the full list see + https://asv.readthedocs.io/en/stable/benchmarks.html#benchmark-attributes + + Some asv benchmarking nomenclature: + - repeat: a benchmark repeat is made up of the following steps: + 1- the `setup` is run, + 2- then the timed benchmark routine is called for `n` iterations, and + 3- finally that teardown function is run. + Each repeat generates a sample, which is the average time that the + routine took across all iterations. A new process is started for each + repeat of each benchmark. A calibration phase before running the repeat + computes the number of iterations that will be executed. Each benchmark + is run for a number of repeats. The setup_cache function is run only once + for all repeats of a benchmark (but it is discarded before the next + benchmark). By default `repeat` is set to 0, which means: + - if rounds==1 the default is + (min_repeat, max_repeat, max_time) = (1, 10, 20.0), + - if rounds != 1 the default is + (min_repeat, max_repeat, max_time) = (1, 5, 10.0) + + - iterations (`number`): the number of iterations in each sample. Note that + `setup` and `teardown` are not run between iterations. asv will + automatically select the number of iterations so that each sample takes + approximately `sample_time` seconds. + + - round: at each round, each benchmark is run for the specified number of + repeats. The idea is that we sample each benchmark over longer periods of + background performance variations. + + - warmup time: asv will spend this time (in seconds) in calling the + benchmarked function repeatedly, before starting to run the actual + benchmark. If not specified, warmup_time defaults to 0.1 seconds + + """ + + # Timing attributes + timeout = 600 # default: 60 s + version = ( + None # benchmark version. Default:None (i.e. hash of source code) + ) + warmup_time = 0.1 # seconds + rounds = 2 + repeat = 0 + sample_time = 0.01 # default: 10 ms = 0.01 s; + min_run_count = 2 # default:2 + + # Custom attributes + input_config_path = str(DEFAULT_JSON_CONFIG_PATH) + + def setup_cache( + self, + ): + """ + Download the input data from the GIN repository to the local + directory specified in the default_config.json + + Notes + ----- + The `setup_cache` method only performs the computations once + per benchmark round and then caches the result to disk [1]_. It cannot + be parametrised [2]_. + + + [1] https://asv.readthedocs.io/en/latest/writing_benchmarks.html#setup-and-teardown-functions + [2] https://asv.readthedocs.io/en/latest/writing_benchmarks.html#parameterized-benchmarks + """ + + # Check config file exists + assert Path(self.input_config_path).exists() + + # Instantiate a CellfinderConfig from the input json file + # (assumes config is json serializable) + with open(self.input_config_path) as cfg: + config_dict = json.load(cfg) + config = CellfinderConfig(**config_dict) + + # Download data with pooch + _ = pooch.retrieve( + url=config.data_url, + known_hash=config.data_hash, + path=config.install_path, + progressbar=True, + processor=pooch.Unzip(extract_dir=config.extract_dir_relative), + ) + + # Check paths to input data should now exist in config + assert Path(config.signal_dir_path).exists() + assert Path(config.background_dir_path).exists() + + def setup(self): + """ + Run the cellfinder workflow setup steps. + + The command line input arguments are injected as dependencies. + """ + + # Run setup + cfg = setup_cellfinder_workflow( + [ + "--config", + self.input_config_path, + ] + ) + + # Save configuration as attribute + self.cfg = cfg + + def teardown(self): + """ + Remove the cellfinder output directory. + + The input data is kept for all repeats of the same benchmark, + to avoid repeated downloads from GIN. + """ + shutil.rmtree(Path(self.cfg.output_path).resolve()) + + +class TimeFullWorkflow(TimeBenchmarkPrepGIN): + """ + Time the full cellfinder workflow. + + It includes reading the signal and background arrays with dask, + detecting the cells and saving the results to an XML file + + Parameters + ---------- + TimeBenchmarkPrepGIN : _type_ + A base class for timing benchmarks for the cellfinder workflow. + """ + + def time_workflow_from_cellfinder_run(self): + run_workflow_from_cellfinder_run(self.cfg) + + +class TimeReadInputDask(TimeBenchmarkPrepGIN): + """ + Time the reading input data operations with dask + + Parameters + ---------- + TimeBenchmarkPrepGIN : _type_ + A base class for timing benchmarks for the cellfinder workflow. + """ + + def time_read_signal_with_dask(self): + read_with_dask(self.cfg.signal_dir_path) + + def time_read_background_with_dask(self): + read_with_dask(self.cfg.background_dir_path) + + +class TimeDetectCells(TimeBenchmarkPrepGIN): + """ + Time the cell detection main pipeline (`cellfinder_run`) + + Parameters + ---------- + TimeBenchmarkPrepGIN : _type_ + A base class for timing benchmarks for the cellfinder workflow. + """ + + # extend basic setup function + def setup(self): + # basic setup + TimeBenchmarkPrepGIN.setup(self) + + # add input data as arrays to config + self.signal_array = read_with_dask(self.cfg.signal_dir_path) + self.background_array = read_with_dask(self.cfg.background_dir_path) + + def time_cellfinder_run(self): + cellfinder_run( + self.signal_array, self.background_array, self.cfg.voxel_sizes + ) + + +class TimeSaveCells(TimeBenchmarkPrepGIN): + # extend basic setup function + def setup(self): + # basic setup + TimeBenchmarkPrepGIN.setup(self) + + # add input data as arrays to config + self.signal_array = read_with_dask(self.cfg.signal_dir_path) + self.background_array = read_with_dask(self.cfg.background_dir_path) + + # detect cells + self.detected_cells = cellfinder_run( + self.signal_array, self.background_array, self.cfg.voxel_sizes + ) + + def time_save_cells(self): + save_cells(self.detected_cells, self.cfg.detected_cells_path) diff --git a/brainglobe_workflows/__init__.py b/brainglobe_workflows/__init__.py index 9f726d43..5fd437c6 100644 --- a/brainglobe_workflows/__init__.py +++ b/brainglobe_workflows/__init__.py @@ -1,7 +1,11 @@ -from importlib.metadata import metadata +from importlib.metadata import PackageNotFoundError, metadata -__version__ = metadata("brainglobe_workflows")["version"] -__author__ = metadata("brainglobe_workflows")["author-email"] -__license__ = metadata("brainglobe_workflows")["license"] +try: + __version__ = metadata("brainglobe_workflows")["version"] + __author__ = metadata("brainglobe_workflows")["author-email"] + __license__ = metadata("brainglobe_workflows")["license"] +except PackageNotFoundError: + # Package not installed + pass del metadata diff --git a/brainglobe_workflows/cellfinder/__init__.py b/brainglobe_workflows/cellfinder/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/brainglobe_workflows/cellfinder/cellfinder_main.py b/brainglobe_workflows/cellfinder/cellfinder_main.py new file mode 100644 index 00000000..fd19db34 --- /dev/null +++ b/brainglobe_workflows/cellfinder/cellfinder_main.py @@ -0,0 +1,404 @@ +"""This script reproduces the most common cellfinder workflow + +It receives as an (optional) command line input the path to a configuration +json file, that holds the values of the required parameters for the workflow. + +If no input json file is passed as a configuration, the default +configuration defined at brainglobe_workflows/cellfinder/default_config.json +is used. + +Example usage: + - to pass a custom configuration, run (from the cellfinder_main.py + parent directory): + python cellfinder_main.py --config path/to/input/config.json + - to use the default configuration, run + python cellfinder_main.py + + +""" + +import argparse +import datetime +import json +import logging +import os +import sys +from dataclasses import dataclass +from pathlib import Path +from typing import Optional, Tuple, Union + +import pooch +from brainglobe_utils.IO.cells import save_cells +from cellfinder_core.main import main as cellfinder_run +from cellfinder_core.tools.IO import read_with_dask +from cellfinder_core.train.train_yml import depth_type + +Pathlike = Union[str, os.PathLike] + +DEFAULT_JSON_CONFIG_PATH = ( + Path(__file__).resolve().parent / "default_config.json" +) + + +@dataclass +class CellfinderConfig: + """ + Define input and output data locations, and the parameters for + the cellfinder preprocessing steps. + """ + + # cellfinder workflows cache directory + install_path: Pathlike + + # cached subdirectory to save data to + extract_dir_relative: Pathlike + signal_subdir: str + background_subdir: str + output_path_basename_relative: Pathlike + detected_cells_filename: Pathlike + + # preprocessing parameters + voxel_sizes: Tuple[float, float, float] + start_plane: int + end_plane: int + trained_model: Optional[ + os.PathLike + ] # if None, it will use a default model + model_weights: Optional[os.PathLike] + model: str + batch_size: int + n_free_cpus: int + network_voxel_sizes: Tuple[int, int, int] + soma_diameter: int + ball_xy_size: int + ball_z_size: int + ball_overlap_fraction: float + log_sigma_size: float + n_sds_above_mean_thresh: int + soma_spread_factor: float + max_cluster_size: int + cube_width: int + cube_height: int + cube_depth: int + network_depth: depth_type + + # origin of data to download (if required) + data_url: Optional[str] = None + data_hash: Optional[str] = None + + # The following attributes are added + # during the setup phase of the workflow + list_signal_files: Optional[list] = None + list_background_files: Optional[list] = None + output_path: Pathlike = "" + signal_dir_path: Pathlike = "" + background_dir_path: Pathlike = "" + detected_cells_path: Pathlike = "" + + +def setup(argv=None) -> CellfinderConfig: + def parse_cli_arguments(argv_) -> argparse.Namespace: + """Define argument parser for cellfinder + workflow script. + + It expects a path to a json file with the + parameters required to run the workflow. + If none is provided, the default + + Returns + ------- + args : argparse.Namespace + command line input arguments parsed + """ + # initialise argument parser + parser = argparse.ArgumentParser( + description=( + "To launch the workflow with " + "a specific set of input parameters, run: " + "`python cellfinder_main.py --config path/to/config.json`" + "where path/to/input/config.json is the json file " + "containing the workflow parameters." + ) + ) + # add arguments + parser.add_argument( + "-c", + "--config", + default=str(DEFAULT_JSON_CONFIG_PATH), + type=str, + metavar="CONFIG", # a name for usage messages + help="", + ) + + # build parser object + args = parser.parse_args(argv_) + + # print error if required arguments not provided + if not args.config: + logger.error("Paths to input config not provided.") + parser.print_help() + + return args + + def setup_logger() -> logging.Logger: + """Setup a logger for this script + + The logger's level is set to DEBUG, and it + is linked to a handler that writes to the + console and whose level is + + Returns + ------- + logging.Logger + a logger object + """ + # define handler that writes to stdout + console_handler = logging.StreamHandler(sys.stdout) + console_format = logging.Formatter( + "%(name)s %(levelname)s: %(message)s" + ) + console_handler.setFormatter(console_format) + + # define logger and link to handler + logger = logging.getLogger( + __name__ + ) # if imported as a module, the logger is named after the module + logger.setLevel(logging.DEBUG) + logger.addHandler(console_handler) + return logger + + def setup_workflow(input_config_path: Path) -> CellfinderConfig: + """Run setup steps prior to executing the workflow + + These setup steps include: + - instantiating a CellfinderConfig object with the required parameters, + - checking if the input data exists locally, and fetching from + GIN repository otherwise, + - adding the path to the input data files to the config, and + - creating a timestamped directory for the output of the workflow if + it doesn't exist and adding its path to the config + + Parameters + ---------- + input_config_path : Path + path to the input config file + + Returns + ------- + config : CellfinderConfig + a dataclass whose attributes are the parameters + for running cellfinder. + """ + + # Check config file exists + assert input_config_path.exists() + + # Instantiate a CellfinderConfig from the input json file + # (assumes config is json serializable) + with open(input_config_path) as cfg: + config_dict = json.load(cfg) + config = CellfinderConfig(**config_dict) + + # Print info logs for status + logger.info(f"Input config read from {input_config_path}") + if input_config_path == DEFAULT_JSON_CONFIG_PATH: + logger.info("Using default config file") + + # Retrieve and add lists of input data to the config, + # if these are defined yet + if not (config.list_signal_files and config.list_signal_files): + # build fullpaths to inputs + config.signal_dir_path = str( + Path(config.install_path) + / config.extract_dir_relative + / config.signal_subdir + ) + config.background_dir_path = str( + Path(config.install_path) + / config.extract_dir_relative + / config.background_subdir + ) + # retrieve data + config = retrieve_input_data(config) + + # Create timestamped output directory if it doesn't exist + timestamp = datetime.datetime.now() + timestamp_formatted = timestamp.strftime("%Y%m%d_%H%M%S") + output_path_timestamped = Path(config.install_path) / ( + str(config.output_path_basename_relative) + timestamp_formatted + ) + output_path_timestamped.mkdir(parents=True, exist_ok=True) + + # Add output path and output file path to config + config.output_path = output_path_timestamped + config.detected_cells_path = ( + config.output_path / config.detected_cells_filename + ) + + return config + + def retrieve_input_data(config: CellfinderConfig) -> CellfinderConfig: + """ + Adds the lists of input data files (signal and background) + to the config. + + It first checks if the input data exists locally. + - If both directories (signal and background) exist, the lists of + signal and background files are added to the config. + - If exactly one of the input data directories is missing, an error + message is logged. + - If neither of them exist, the data is retrieved from the provided GIN + repository. If no URL or hash to GIN is provided, an error is shown. + + Parameters + ---------- + config : CellfinderConfig + a dataclass whose attributes are the parameters + for running cellfinder. + + Returns + ------- + config : CellfinderConfig + a dataclass whose attributes are the parameters + for running cellfinder. + """ + # Check if input data (signal and background) exist locally. + # If both directories exist, get list of signal and background files + if ( + Path(config.signal_dir_path).exists() + and Path(config.background_dir_path).exists() + ): + logger.info("Fetching input data from the local directories") + + config.list_signal_files = [ + f + for f in Path(config.signal_dir_path).resolve().iterdir() + if f.is_file() + ] + config.list_background_files = [ + f + for f in Path(config.background_dir_path).resolve().iterdir() + if f.is_file() + ] + + # If exactly one of the input data directories is missing, print error + elif ( + Path(config.signal_dir_path).resolve().exists() + or Path(config.background_dir_path).resolve().exists() + ): + if not Path(config.signal_dir_path).resolve().exists(): + logger.error( + f"The directory {config.signal_dir_path} does not exist" + ) + else: + logger.error( + f"The directory {config.background_dir_path} " + "does not exist" + ) + + # If neither of them exist, retrieve data from GIN repository + else: + # check if GIN URL and hash are defined (log error otherwise) + if (not config.data_url) or (not config.data_hash): + logger.error( + "Input data not found locally, and URL/hash to " + "GIN repository not provided" + ) + + else: + # get list of files in GIN archive with pooch.retrieve + list_files_archive = pooch.retrieve( + url=config.data_url, + known_hash=config.data_hash, + path=config.install_path, # zip will be downloaded here + progressbar=True, + processor=pooch.Unzip( + extract_dir=config.extract_dir_relative + # path to unzipped dir, + # *relative* to the path set in 'path' + ), + ) + logger.info( + "Fetching input data from the provided GIN repository" + ) + + # Check signal and background parent directories exist now + assert Path(config.signal_dir_path).resolve().exists() + assert Path(config.background_dir_path).resolve().exists() + + # Add signal files to config + config.list_signal_files = [ + f + for f in list_files_archive + if f.startswith( + str(Path(config.signal_dir_path).resolve()) + ) # if str(config.signal_dir_path) in f + ] + + # Add background files to config + config.list_background_files = [ + f + for f in list_files_archive + if f.startswith( + str(Path(config.background_dir_path).resolve()) + ) # if str(config.background_dir_path) in f + ] + + return config + + # parse command line input arguments: + # sys.argv in most cases except for testing + # see https://paiml.com/docs/home/books/testing-in-python/chapter08-monkeypatching/#the-simplest-monkeypatching + argv = argv or sys.argv[1:] + args = parse_cli_arguments(argv) + + # setup logger + logger = setup_logger() + + # run setup steps and return config + cfg = setup_workflow(Path(args.config)) + + return cfg + + +def run_workflow_from_cellfinder_run(cfg: CellfinderConfig): + """ + Run workflow based on the cellfinder_core.main.main() + function. + + The steps are: + 1. Read the input signal and background data as two separate + Dask arrays. + 2. Run the main cellfinder pipeline on the input Dask arrays, + with the parameters defined in the input configuration (cfg). + 3. Save the detected cells as an xml file to the location specified in + the input configuration (cfg). + + Parameters + ---------- + cfg : CellfinderConfig + a class with the required setup methods and parameters for + the cellfinder workflow + """ + # Read input data as Dask arrays + signal_array = read_with_dask(cfg.signal_dir_path) + background_array = read_with_dask(cfg.background_dir_path) + + # Run main analysis using `cellfinder_run` + detected_cells = cellfinder_run( + signal_array, background_array, cfg.voxel_sizes + ) + + # Save results to xml file + save_cells( + detected_cells, + cfg.detected_cells_path, + ) + + +if __name__ == "__main__": + # run setup + cfg = setup() + + # run workflow + run_workflow_from_cellfinder_run(cfg) # only this will be benchmarked diff --git a/brainglobe_workflows/cellfinder/default_config.json b/brainglobe_workflows/cellfinder/default_config.json new file mode 100644 index 00000000..a80a4ba4 --- /dev/null +++ b/brainglobe_workflows/cellfinder/default_config.json @@ -0,0 +1,39 @@ +{ + "install_path": ".cellfinder_workflows", + "data_url": "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip", + "data_hash": "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914", + "extract_dir_relative": "cellfinder_test_data", + "signal_subdir": "signal", + "background_subdir": "background", + "output_path_basename_relative": "cellfinder_output_", + "detected_cells_filename": "detected_cells.xml", + "voxel_sizes": [ + 5, + 2, + 2 + ], + "start_plane": 0, + "end_plane": -1, + "trained_model": null, + "model_weights": null, + "model": "resnet50_tv", + "batch_size": 32, + "n_free_cpus": 2, + "network_voxel_sizes": [ + 5, + 1, + 1 + ], + "soma_diameter": 16, + "ball_xy_size": 6, + "ball_z_size": 15, + "ball_overlap_fraction": 0.6, + "log_sigma_size": 0.2, + "n_sds_above_mean_thresh": 10, + "soma_spread_factor": 1.4, + "max_cluster_size": 100000, + "cube_width": 50, + "cube_height": 50, + "cube_depth": 20, + "network_depth": "50" +} diff --git a/pyproject.toml b/pyproject.toml index e387c824..18bc443b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,29 +1,29 @@ [project] name = "brainglobe-workflows" -description = "Automated 3D cell detection and registration of whole-brain images" -readme = "README.md" -license = { file = "LICENSE" } -requires-python = ">=3.9" authors = [ { name = "Adam Tyson", email = "code@adamltyson.com" }, + { name = "BrainGlobe developers", email = "code@adamltyson.com" }, { name = "Christian Niedworok" }, { name = "Charly Rousseau" }, ] +description = "A collection of end-to-end data analysis workflows executed using BrainGlobe tools." +readme = "README.md" +license = { file = "LICENSE" } +requires-python = ">=3.9" classifiers = [ "Development Status :: 3 - Alpha", - "Operating System :: POSIX :: Linux", - "Operating System :: Microsoft :: Windows :: Windows 10", - "Programming Language :: Python", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Topic :: Scientific/Engineering :: Image Recognition", "Intended Audience :: Developers", "Intended Audience :: Science/Research", + "License :: OSI Approved :: BSD License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python", + "Topic :: Scientific/Engineering :: Image Recognition", ] dependencies = [ "brainreg>=1.0.0", - "cellfinder-core>=0.2.4", + "cellfinder-core>=0.2.4,<1.0.0", "configobj", "fancylog>=0.0.7", "imio", @@ -43,6 +43,9 @@ dynamic = ["version"] cellfinder = "brainglobe_workflows.main:main" [project.optional-dependencies] +benchmark = [ + "pooch", +] # Depending on how asv builds wheels, we may need to play with asv.config.json to force it to include this optional dependency. dev = [ "black", "pytest-cov", @@ -52,24 +55,34 @@ dev = [ "pre-commit", "setuptools_scm", ] -napari = ["napari[pyside2]", "brainglobe-napari-io", "cellfinder-napari"] +napari = ["napari[pyside2]", "brainglobe-napari-io", "cellfinder-napari<1.0.0"] [project.urls] -source = "https://github.com/brainglobe/brainglobe-workflows" -bug_tracker = "https://github.com/brainglobe/brainglobe-workflows/issues" -homepage = "https://brainglobe.info" -documentation = "https://brainglobe.info/documentation/brainglobe-workflows" +"Bug Tracker" = "https://github.com/brainglobe/brainglobe-workflows/issues" +"Documentation" = "https://brainglobe.info/documentation/brainglobe-workflows" +"Homepage" = "https://brainglobe.info" +"Source Code" = "https://github.com/brainglobe/brainglobe-workflows" + [build-system] requires = ["setuptools>=45", "wheel", "setuptools_scm[toml]>=6.2"] build-backend = "setuptools.build_meta" [tool.black] -target-version = ['py38', 'py39', 'py310'] +target-version = ["py39", "py310"] skip-string-normalization = false line-length = 79 +ignore = ["*.yaml"] + +[tool.cibuildwheel] +build = "cp39-* cp310-*" + +[tool.cibuildwheel.macos] +archs = ["x86_64", "arm64"] [tool.check-manifest] -ignore = ["*.yaml"] + +[tool.pytest.ini_options] +addopts = "--cov=brainglobe_workflows" [tool.ruff] line-length = 79 @@ -83,6 +96,25 @@ zip-safe = false [tool.setuptools.packages.find] include = ["brainglobe_workflows"] -exclude = ["tests", "resources"] +exclude = ["benchmarks", "tests", "resources"] [tool.setuptools_scm] + +[tool.tox] +legacy_tox_ini = """ +[tox] +envlist = py{38,39,310} +isolated_build = True + +[gh-actions] +python = + 3.8: py38 + 3.9: py39 + 3.10: py310 + +[testenv] +extras = + dev +commands = + pytest -v --color=yes --cov=brainglobe_workflows --cov-report=xml +""" diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_integration/__init__.py b/tests/test_integration/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_integration/conftest.py b/tests/test_integration/conftest.py new file mode 100644 index 00000000..d9207917 --- /dev/null +++ b/tests/test_integration/conftest.py @@ -0,0 +1,290 @@ +import json +from pathlib import Path +from typing import Any + +import pooch +import pytest + +from brainglobe_workflows.cellfinder.cellfinder_main import CellfinderConfig + + +def make_config_dict_fetch_from_local(cellfinder_cache_dir: Path) -> dict: + """Generate a config dictionary with the required parameters + for the workflow + + The input data is assumed to be locally at cellfinder_cache_dir. + The results are saved in a timestamped output subdirectory under + cellfinder_cache_dir + + Parameters + ---------- + cellfinder_cache_dir : Path + Path to the directory where the downloaded input data will be unzipped, + and the output will be saved + + Returns + ------- + dict + dictionary with the required parameters for the workflow + """ + return { + "install_path": cellfinder_cache_dir, + "extract_dir_relative": "cellfinder_test_data", # relative path + "signal_subdir": "signal", + "background_subdir": "background", + "output_path_basename_relative": "cellfinder_output_", + "detected_cells_filename": "detected_cells.xml", + "voxel_sizes": [5, 2, 2], # microns + "start_plane": 0, + "end_plane": -1, + "trained_model": None, # if None, it will use a default model + "model_weights": None, + "model": "resnet50_tv", + "batch_size": 32, + "n_free_cpus": 2, + "network_voxel_sizes": [5, 1, 1], + "soma_diameter": 16, + "ball_xy_size": 6, + "ball_z_size": 15, + "ball_overlap_fraction": 0.6, + "log_sigma_size": 0.2, + "n_sds_above_mean_thresh": 10, + "soma_spread_factor": 1.4, + "max_cluster_size": 100000, + "cube_width": 50, + "cube_height": 50, + "cube_depth": 20, + "network_depth": "50", + } + + +def make_config_dict_fetch_from_GIN( + cellfinder_cache_dir: Path, + data_url: str, + data_hash: str, +) -> dict: + """Generate a config dictionary with the required parameters + for the workflow + + The input data is fetched from GIN and downloaded to cellfinder_cache_dir. + The results are also saved in a timestamped output subdirectory under + cellfinder_cache_dir + + Parameters + ---------- + cellfinder_cache_dir : Path + Path to the directory where the downloaded input data will be unzipped, + and the output will be saved + data_url: str + URL to the GIN repository with the data to download + data_hash: str + Hash of the data to download + + Returns + ------- + dict + dictionary with the required parameters for the workflow + """ + + config = make_config_dict_fetch_from_local(cellfinder_cache_dir) + config["data_url"] = data_url + config["data_hash"] = data_hash + + return config + + +def prep_json(obj: Any) -> Any: + """ + Returns a JSON encodable version of the input object. + + It uses the JSON default encoder for all objects + except those of type `Path`. + + + Parameters + ---------- + obj : Any + _description_ + + Returns + ------- + Any + JSON serializable version of input object + """ + if isinstance(obj, Path): + return str(obj) + else: + json_decoder = json.JSONEncoder() + return json_decoder.default(obj) + + +@pytest.fixture(autouse=True) +def cellfinder_cache_dir(tmp_path: Path) -> Path: + """Create a .cellfinder_workflows directory + under a temporary pytest directory and return + its path. + + The temporary directory is available via pytest's tmp_path + fixture. A new temporary directory is created every function call + (i.e., scope="function") + + Parameters + ---------- + tmp_path : Path + path to pytest-generated temporary directory + + Returns + ------- + Path + path to the created cellfinder_workflows cache directory + """ + + return Path(tmp_path) / ".cellfinder_workflows" + + +@pytest.fixture(scope="session") +def data_url() -> str: + """Return the URL to the GIN repository with the input data + + Returns + ------- + str + URL to the GIN repository with the input data + """ + return "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip" + + +@pytest.fixture(scope="session") +def data_hash() -> str: + """Return the hash of the GIN input data + + Returns + ------- + str + Hash to the GIN input data + """ + return "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914" + + +@pytest.fixture(scope="session") +def default_json_config_path() -> Path: + """Return the path to the json file + with the default config parameters + + Returns + ------- + Path + path to the json file with the default config parameters + """ + from brainglobe_workflows.cellfinder.cellfinder_main import ( + DEFAULT_JSON_CONFIG_PATH, + ) + + return DEFAULT_JSON_CONFIG_PATH + + +@pytest.fixture() +def path_to_config_fetch_GIN( + tmp_path: Path, cellfinder_cache_dir: Path, data_url: str, data_hash: str +) -> Path: + """Create an input config that fetches data from GIN and + return its path + + Parameters + ---------- + tmp_path : Path + path to a fresh pytest-generated temporary directory. The + generated config is saved here. + + cellfinder_cache_dir : Path + path to the cellfinder cache directory, where the paths + in the config should point to. + + data_url: str + URL to the GIN repository with the input data + + data_hash: str + hash to the GIN input data + + Returns + ------- + input_config_path : Path + path to config file that fetches data from GIN + """ + # create config dict + config_dict = make_config_dict_fetch_from_GIN( + cellfinder_cache_dir, data_url, data_hash + ) + + # create a temp json file to dump config data + input_config_path = ( + tmp_path / "input_config.json" + ) # save it in a temp dir separate from cellfinder_cache_dir + + # save config data to json file + with open(input_config_path, "w") as js: + json.dump(config_dict, js, default=prep_json) + + # check json file exists + assert Path(input_config_path).is_file() + + return input_config_path + + +@pytest.fixture() +def path_to_config_fetch_local( + tmp_path: Path, cellfinder_cache_dir: Path, data_url: str, data_hash: str +) -> Path: + """Create an input config that points to local data and + return its path. + + The local data is downloaded from GIN, but no reference + to the GIN repository is included in the config. + + Parameters + ---------- + tmp_path : Path + path to a fresh pytest-generated temporary directory. The + generated config is saved here. + + cellfinder_cache_dir : Path + path to the cellfinder cache directory, where the paths + in the config should point to. + + data_url: str + URL to the GIN repository with the input data + + data_hash: str + hash to the GIN input data + + Returns + ------- + path_to_config_fetch_GIN : Path + path to a config file that fetches data from GIN + """ + + # instantiate basic config (assumes data is local) + config_dict = make_config_dict_fetch_from_local(cellfinder_cache_dir) + config = CellfinderConfig(**config_dict) + + # download GIN data to specified local directory + pooch.retrieve( + url=data_url, + known_hash=data_hash, + path=config.install_path, # path to download zip to + progressbar=True, + processor=pooch.Unzip( + extract_dir=config.extract_dir_relative + # path to unzipped dir, *relative* to 'path' + ), + ) + + # save config to json + input_config_path = tmp_path / "input_config.json" + with open(input_config_path, "w") as js: + json.dump(config_dict, js, default=prep_json) + + # check json file exists + assert Path(input_config_path).is_file() + + return input_config_path diff --git a/tests/test_integration/test_cellfinder_workflow.py b/tests/test_integration/test_cellfinder_workflow.py new file mode 100644 index 00000000..e55d0a46 --- /dev/null +++ b/tests/test_integration/test_cellfinder_workflow.py @@ -0,0 +1,211 @@ +import json +import subprocess +import sys +from pathlib import Path + +from brainglobe_workflows.cellfinder.cellfinder_main import CellfinderConfig + + +def test_run_with_default_config(tmp_path, default_json_config_path): + """Test workflow run with no command line arguments + + If no command line arguments are provided, the default + config at brainglobe_workflows/cellfinder/default_config.json + should be used. + + After the workflow is run we check that: + - there are no errors (via returncode), + - the logs reflect the default config file was used, and + - a single output directory exists with the expected + output file inside it + + Parameters + ---------- + tmp_path : Path + path to a pytest-generated temporary directory. + """ + + # run workflow with no CLI arguments, + # with cwd=tmp_path + subprocess_output = subprocess.run( + [ + sys.executable, + Path(__file__).resolve().parents[2] + / "brainglobe_workflows" + / "cellfinder" + / "cellfinder_main.py", + ], + cwd=tmp_path, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + encoding="utf-8", + ) + + # check returncode + assert subprocess_output.returncode == 0 + + # check logs + assert "Using default config file" in subprocess_output.stdout + + # Check one output directory exists and has expected + # output file inside it + assert_outputs(default_json_config_path, tmp_path) + + +def test_run_with_GIN_data( + path_to_config_fetch_GIN, +): + """Test workflow runs when passing a config that fetches data + from the GIN repository + + After the workflow is run we check that: + - there are no errors (via returncode), + - the logs reflect the input config file was used, + - the logs reflect the data was downloaded from GIN, and + - a single output directory exists with the expected + output file inside it + + Parameters + ---------- + tmp_path : Path + path to a pytest-generated temporary directory. + """ + # run workflow with CLI and capture log + subprocess_output = subprocess.run( + [ + sys.executable, + Path(__file__).resolve().parents[2] + / "brainglobe_workflows" + / "cellfinder" + / "cellfinder_main.py", + "--config", + str(path_to_config_fetch_GIN), + ], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + encoding="utf-8", + ) + + # check returncode + assert subprocess_output.returncode == 0 + + # check logs + assert ( + f"Input config read from {str(path_to_config_fetch_GIN)}" + in subprocess_output.stdout + ) + assert ( + "Fetching input data from the provided GIN repository" + in subprocess_output.stdout + ) + + # check one output directory exists and + # has expected output file inside it + assert_outputs(path_to_config_fetch_GIN) + + +def test_run_with_local_data( + path_to_config_fetch_local, +): + """Test workflow runs when passing a config that uses + local data + + After the workflow is run we check that: + - there are no errors (via returncode), + - the logs reflect the input config file was used, + - the logs reflect the data was found locally, and + - a single output directory exists with the expected + output file inside it + + Parameters + ---------- + tmp_path : Path + path to a pytest-generated temporary directory. + """ + + # run workflow with CLI + subprocess_output = subprocess.run( + [ + sys.executable, + Path(__file__).resolve().parents[2] + / "brainglobe_workflows" + / "cellfinder" + / "cellfinder_main.py", + "--config", + str(path_to_config_fetch_local), + ], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + encoding="utf-8", + ) + + # check returncode + assert subprocess_output.returncode == 0 + + # check logs + assert ( + f"Input config read from {str(path_to_config_fetch_local)}" + in subprocess_output.stdout + ) + assert ( + "Fetching input data from the local directories" + in subprocess_output.stdout + ) + + # check one output directory exists and + # has expected output file inside it + assert_outputs(path_to_config_fetch_local) + + +def assert_outputs(path_to_config, parent_dir_of_install_path=""): + """Helper function to determine whether the output is + as expected. + + It checks that: + - a single output directory exists, and + - the expected output file exists inside it + + Note that config.output_path is only defined after the workflow + setup is run, because its name is timestamped. Therefore, + we search for an output directory based on config.output_path_basename. + + Parameters + ---------- + path_to_config : Path + path to the input config used to generate the + output. + + parent_dir_of_install_path : str, optional + If the install_path in the input config is relative to the + directory the script is launched from (as is the case in the + default_config.json file), the absolute path to its parent_dir + must be specified here. If the paths to install_path is + absolute, this input is not required. By default "". + """ + + # load input config + with open(path_to_config) as config: + config_dict = json.load(config) + config = CellfinderConfig(**config_dict) + + # check one output directory exists and + # it has expected output file inside it + output_path_without_timestamp = ( + Path(parent_dir_of_install_path) + / config.install_path + / config.output_path_basename_relative + ) + output_path_timestamped = [ + x + for x in output_path_without_timestamp.parent.glob("*") + if x.is_dir() and x.name.startswith(output_path_without_timestamp.name) + ] + + assert len(output_path_timestamped) == 1 + assert (output_path_timestamped[0]).exists() + assert ( + output_path_timestamped[0] / config.detected_cells_filename + ).is_file() diff --git a/tests/test_unit/__init__.py b/tests/test_unit/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_unit/test_placeholder.py b/tests/test_unit/test_placeholder.py new file mode 100644 index 00000000..3ada1ee4 --- /dev/null +++ b/tests/test_unit/test_placeholder.py @@ -0,0 +1,2 @@ +def test_placeholder(): + assert True