diff --git a/.github/workflows/test_and_deploy.yml b/.github/workflows/test_and_deploy.yml
index 5a194ebf..4de3a200 100644
--- a/.github/workflows/test_and_deploy.yml
+++ b/.github/workflows/test_and_deploy.yml
@@ -32,20 +32,22 @@ jobs:
       - uses: neuroinformatics-unit/actions/check_manifest@v2
 
   test:
-    needs: [linting,manifest]
+    needs: [linting, manifest]
+    name: ${{ matrix.os }} py${{ matrix.python-version }}
     runs-on: ${{ matrix.os }}
     env:
       INPUT_COREDEV: ${{ github.event.inputs.coredev }}
     strategy:
       fail-fast: false
       matrix:
-        # Run across a mixture of Python versions and operating systems
+        # Run tests on ubuntu across all supported versions
+        python-version: ["3.9", "3.10"]
+        os: [ubuntu-latest]
+        # Include at least one MacOS and Windows test
         include:
         - os: macos-latest
           python-version: "3.10"
         - os: windows-latest
-          python-version: "3.9"
-        - os: ubuntu-latest
           python-version: "3.10"
     steps:
       - uses: neuroinformatics-unit/actions/test@v2
@@ -60,7 +62,8 @@ jobs:
     steps:
       - uses: neuroinformatics-unit/actions/build_sdist_wheels@v2
 
-  deploy:
+  upload_all:
+    name: Publish build distributions
     needs: [build_sdist_wheels]
     runs-on: ubuntu-latest
     if: github.event_name == 'push' && github.ref_type == 'tag'
@@ -75,7 +78,7 @@ jobs:
           password: ${{ secrets.TWINE_API_KEY }}
 
   build-and-push-docker:
-    needs: deploy
+    needs: [upload_all]
     runs-on: ubuntu-latest
     permissions:
       contents: read
@@ -83,7 +86,7 @@ jobs:
 
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v2
+        uses: actions/checkout@v4
 
       - name: Log in to the Container registry
         uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9
diff --git a/.gitignore b/.gitignore
index 8c1f8334..56a96743 100644
--- a/.gitignore
+++ b/.gitignore
@@ -77,10 +77,17 @@ doc/build/
 _build/
 mkdocs.yml
 
+# MkDocs documentation
+site/
+
 # PyBuilder
 target/
 
-# Jupyter Notebook
+# Pycharm and VSCode
+.idea/
+.vscode/
+
+# IPython Notebook
 .ipynb_checkpoints
 
 # pyenv
@@ -109,18 +116,16 @@ venv.bak/
 # Rope project settings
 .ropeproject
 
-# mkdocs documentation
-/site
-
 # mypy
 .mypy_cache/
 
-.idea/
-
 *.~lock.*
 
 pip-wheel-metadata/
 
-*.DS_Store
+# OS
+.DS_Store
 
-*.vscode/
+# written by setuptools_scm
+**/_version.py
+benchmarks/results/*
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 5e015b5c..7e44b7b1 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -30,3 +30,9 @@ repos:
             additional_dependencies:
                 - types-setuptools
                 - types-requests
+    - repo: https://github.com/mgedmin/check-manifest
+      rev: "0.49"
+      hooks:
+          - id: check-manifest
+            args: [--no-build-isolation]
+            additional_dependencies: [setuptools-scm]
diff --git a/LICENSE b/LICENSE
index 3e221c2a..77f53359 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
 BSD 3-Clause License
 
-Copyright (c) 2020, University College London
+Copyright (c) 2023, BrainGlobe developers.
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/MANIFEST.in b/MANIFEST.in
index f99c19d9..0b11081e 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,16 +1,23 @@
 include README.md
 include LICENSE
+include brainglobe_workflows/cellfinder/default_config.json
 
+exclude .pre-commit-config.yaml
 exclude *.yaml
 exclude *.yml
 exclude Dockerfile
 exclude *.ini
+exclude asv.conf.json
 
 recursive-include brainglobe_workflows *.py
-include brainglobe_workflows/config/*
+
+recursive-exclude * __pycache__
+recursive-exclude * *.py[co]
 
 global-include *.pxd
 
+prune benchmarks
+prune docs
 prune tests
 prune resources
 
diff --git a/README.md b/README.md
index b831d0dc..78db778b 100644
--- a/README.md
+++ b/README.md
@@ -1,150 +1,139 @@
-[![Python Version](https://img.shields.io/pypi/pyversions/cellfinder.svg)](https://pypi.org/project/cellfinder)
-[![PyPI](https://img.shields.io/pypi/v/cellfinder.svg)](https://pypi.org/project/cellfinder)
-[![Downloads](https://pepy.tech/badge/cellfinder)](https://pepy.tech/project/cellfinder)
-[![Wheel](https://img.shields.io/pypi/wheel/cellfinder.svg)](https://pypi.org/project/cellfinder)
-[![Development Status](https://img.shields.io/pypi/status/cellfinder.svg)](https://github.com/brainglobe/cellfinder)
-[![Tests](https://img.shields.io/github/workflow/status/brainglobe/cellfinder/tests)](
-    https://github.com/brainglobe/cellfinder/actions)
-[![codecov](https://codecov.io/gh/brainglobe/cellfinder/branch/master/graph/badge.svg?token=s3MweEFPhl)](https://codecov.io/gh/brainglobe/cellfinder)
+[![Python Version](https://img.shields.io/pypi/pyversions/brainglobe-workflows.svg)](https://pypi.org/project/brainglobe-workflows)
+[![PyPI](https://img.shields.io/pypi/v/brainglobe-workflows.svg)](https://pypi.org/project/brainglobe-workflows)
+[![Downloads](https://pepy.tech/badge/brainglobe-workflows)](https://pepy.tech/project/brainglobe-workflows)
+[![Wheel](https://img.shields.io/pypi/wheel/brainglobe-workflows.svg)](https://pypi.org/project/brainglobe-workflows)
+[![Development Status](https://img.shields.io/pypi/status/brainglobe-workflows.svg)](https://github.com/brainglobe/brainglobe-workflows)
+[![Tests](https://img.shields.io/github/workflow/status/brainglobe/brainglobe-workflows/tests)](
+    https://github.com/brainglobe/brainglobe-workflows/actions)
+[![codecov](https://codecov.io/gh/brainglobe/brainglobe-workflows/branch/master/graph/badge.svg?token=s3MweEFPhl)](https://codecov.io/gh/brainglobe/brainglobe-workflows)
 [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/python/black)
 [![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/)
 [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit)
-[![Contributions](https://img.shields.io/badge/Contributions-Welcome-brightgreen.svg)](https://docs.brainglobe.info/cellfinder/contributing)
-[![Website](https://img.shields.io/website?up_message=online&url=https%3A%2F%2Fbrainglobe.info)](https://brainglobe.info/documentation/cellfinder/index.html)
+[![Contributions](https://img.shields.io/badge/Contributions-Welcome-brightgreen.svg)](https://brainglobe.info/developers/index.html)
+[![Website](https://img.shields.io/website?up_message=online&url=https%3A%2F%2Fbrainglobe.info)](https://brainglobe.info/documentation/brainglobe-workflows/index.html)
 [![Twitter](https://img.shields.io/twitter/follow/brain_globe?style=social)](https://twitter.com/brain_globe)
 
+# BrainGlobe Workflows
+
+`brainglobe-workflows` is a package that provides users with a number of out-of-the-box data analysis workflows employed in neuroscience, implemented using BrainGlobe tools.
+
+At present, the package currently offers the following workflows:
+
+- [cellfinder](#cellfinder): Whole-brain detection, registration, and analysis. The successor to the old [cellfinder CLI](TODO:permalnk to deprecated cellfinder tag on repo) TODO: rename tool appropriately and give flavour text
+
+## Installation
+
+`brainglobe-workflows` comes packaged with version 1 of BrainGlobe, so the easiest way to make sure you get the latest release and stay up to date is to install that package - [follow this link to see the install instructions](TODO: link me!).
+
+If you want to install BrainGlobe workflows as a standalone tool, you can run `pip install` in your desired environment:
+
+```bash
+pip install brainglobe-workflows
+```
+
+## Contributing
+
+Contributions to BrainGlobe are more than welcome.
+Please see the [developers guide](https://brainglobe.info/developers/index.html).
+
+## Citing `brainglobe-workflows`
+
+**If you use any tools in the [brainglobe suite](https://brainglobe.info/documentation/index.html), please [let us know](mailto:code@adamltyson.com?subject=cellfinder), and we'd be happy to promote your paper/talk etc.**
+
+If you find [`cellfinder`](#cellfinder) useful, and use it in your research, please cite the paper outlining the cell detection algorithm:
+> Tyson, A. L., Rousseau, C. V., Niedworok, C. J., Keshavarzi, S., Tsitoura, C., Cossell, L., Strom, M. and Margrie, T. W. (2021) “A deep learning algorithm for 3D cell detection in whole mouse brain image datasets’ PLOS Computational Biology, 17(5), e1009074
+[https://doi.org/10.1371/journal.pcbi.1009074](https://doi.org/10.1371/journal.pcbi.1009074)
+>
+If you use any of the image registration functions in `cellfinder`, please also cite [`brainreg`](https://github.com/brainglobe/brainreg#citing-brainreg).
+
+---
+
 # Cellfinder
+
+**TODO: move this information to an appropriate place on the website**
+
 Whole-brain cell detection, registration and analysis.
 
-**N.B. If you want to just use the cell detection part of cellfinder, please
-see the standalone [cellfinder-core](https://github.com/brainglobe/cellfinder-core)
-package, or the [cellfinder plugin](https://github.com/brainglobe/cellfinder-napari)
-for [napari](https://napari.org/).**
+**N.B. If you want to just use the cell detection part of cellfinder, please see the standalone [cellfinder-core](https://github.com/brainglobe/cellfinder-core) package, or the [cellfinder plugin](https://github.com/brainglobe/cellfinder-napari) for [napari](https://napari.org/).**
 
----
 `cellfinder` is a collection of tools developed by [Adam Tyson](https://github.com/adamltyson), [Charly Rousseau](https://github.com/crousseau) and [Christian Niedworok](https://github.com/cniedwor) in the [Margrie Lab](https://www.sainsburywellcome.org/web/groups/margrie-lab), generously supported by the [Sainsbury Wellcome Centre](https://www.sainsburywellcome.org/web/).
 
-`cellfinder` is a designed for the analysis of whole-brain imaging data such as
- [serial-section imaging](https://sainsburywellcomecentre.github.io/OpenSerialSection/)
- and lightsheet imaging in cleared tissue. The aim is to provide a single solution for:
-
- * Cell detection (initial cell candidate detection and refinement using
- deep learning) (using [cellfinder-core](https://github.com/brainglobe/cellfinder-core))
- * Atlas registration (using [brainreg](https://github.com/brainglobe/brainreg))
- * Analysis of cell positions in a common space
+`cellfinder` is a designed for the analysis of whole-brain imaging data such as [serial-section imaging](https://sainsburywellcomecentre.github.io/OpenSerialSection/) and lightsheet imaging in cleared tissue.
+The aim is to provide a single solution for:
 
- ---
-Installation is with
-`pip install cellfinder`
+- Cell detection (initial cell candidate detection and refinement using  deep learning) (using [cellfinder-core](https://github.com/brainglobe/cellfinder-core)),
+- Atlas registration (using [brainreg](https://github.com/brainglobe/brainreg)),
+- Analysis of cell positions in a common space.
 
----
 Basic usage:
+
 ```bash
 cellfinder -s signal_images -b background_images -o output_dir --metadata metadata
 ```
-Full documentation can be
-found [here](https://brainglobe.info/documentation/cellfinder/index.html).
 
-This software is at a very early stage, and was written with our data in mind.
-Over time we hope to support other data types/formats. If you have any issues, please get in touch [on the forum](https://forum.image.sc/tag/brainglobe) or by
-[raising an issue](https://github.com/brainglobe/cellfinder/issues/new/choose).
+Full documentation can be found [here](https://brainglobe.info/documentation/cellfinder/index.html).
 
+This software is at a very early stage, and was written with our data in mind.
+Over time we hope to support other data types/formats.
+If you have any issues, please get in touch [on the forum](https://forum.image.sc/tag/brainglobe) or by [raising an issue](https://github.com/brainglobe/cellfinder/issues/new/choose).
 
----
 ## Illustration
 
 ### Introduction
-cellfinder takes a stitched, but otherwise raw whole-brain dataset with at least
-two channels:
- * Background channel (i.e. autofluorescence)
- * Signal channel, the one with the cells to be detected:
 
-![raw](https://raw.githubusercontent.com/brainglobe/cellfinder/master/resources/raw.png)
-**Raw coronal serial two-photon mouse brain image showing labelled cells**
+cellfinder takes a stitched, but otherwise raw whole-brain dataset with at least two channels:
+
+- Background channel (i.e. autofluorescence),
+- Signal channel, the one with the cells to be detected:
 
+![Raw coronal serial two-photon mouse brain image showing labelled cells](https://raw.githubusercontent.com/brainglobe/cellfinder/master/resources/raw.png)
 
 ### Cell candidate detection
-Classical image analysis (e.g. filters, thresholding) is used to find
-cell-like objects (with false positives):
 
-![raw](https://raw.githubusercontent.com/brainglobe/cellfinder/master/resources/detect.png)
-**Candidate cells (including many artefacts)**
+Classical image analysis (e.g. filters, thresholding) is used to find cell-like objects (with false positives):
 
+![Candidate cells (including many artefacts)](https://raw.githubusercontent.com/brainglobe/cellfinder/master/resources/detect.png)
 
 ### Cell candidate classification
-A deep-learning network (ResNet) is used to classify cell candidates as true
-cells or artefacts:
 
-![raw](https://raw.githubusercontent.com/brainglobe/cellfinder/master/resources/classify.png)
-**Cassified cell candidates. Yellow - cells, Blue - artefacts**
+A deep-learning network (ResNet) is used to classify cell candidates as true cells (yellow) or artefacts (blue):
+
+![Cassified cell candidates. Yellow - cells, Blue - artefacts](https://raw.githubusercontent.com/brainglobe/cellfinder/master/resources/classify.png)
 
-### Registration and segmentation (brainreg)
-Using [brainreg](https://github.com/brainglobe/brainreg),
-cellfinder aligns a template brain and atlas annotations (e.g.
-the Allen Reference Atlas, ARA) to the sample allowing detected cells to be assigned
-a brain region.
+### Registration and segmentation (`brainreg`)
 
-This transformation can be inverted, allowing detected cells to be
-transformed to a standard anatomical space.
+Using [`brainreg`](https://github.com/brainglobe/brainreg), `cellfinder` aligns a template brain and atlas annotations (e.g. the Allen Reference Atlas, ARA) to the sample allowing detected cells to be assigned a brain region.
 
-![raw](https://raw.githubusercontent.com/brainglobe/cellfinder/master/resources/register.png)
-**ARA overlaid on sample image**
+This transformation can be inverted, allowing detected cells to be transformed to a standard anatomical space.
+
+![ARA overlaid on sample image](https://raw.githubusercontent.com/brainglobe/cellfinder/master/resources/register.png)
 
 ### Analysis of cell positions in a common anatomical space
-Registration to a template allows for powerful group-level analysis of cellular
-disributions. *(Example to come)*
+
+Registration to a template allows for powerful group-level analysis of cellular distributions.
+*(Example to come)*
 
 ## Examples
+
 *(more to come)*
 
 ### Tracing of inputs to retrosplenial cortex (RSP)
-Input cell somas detected by cellfinder, aligned to the Allen Reference Atlas,
-and visualised in [brainrender](https://github.com/brainglobe/brainrender) along
+
+Input cell somas detected by cellfinder, aligned to the Allen Reference Atlas, and visualised in [brainrender](https://github.com/brainglobe/brainrender) along
 with RSP.
 
 ![brainrender](https://raw.githubusercontent.com/brainglobe/cellfinder/master/resources/brainrender.png)
 
-Data courtesy of Sepiedeh Keshavarzi and Chryssanthi Tsitoura. [Details here](https://www.youtube.com/watch?v=pMHP0o-KsoQ)
+Data courtesy of Sepiedeh Keshavarzi and Chryssanthi Tsitoura.
+[Details here](https://www.youtube.com/watch?v=pMHP0o-KsoQ)
 
 ## Visualisation
 
-cellfinder comes with a plugin ([brainglobe-napari-io](https://github.com/brainglobe/brainglobe-napari-io)) for [napari](https://github.com/napari/napari) to view your data
-
-#### Usage
-* Open napari (however you normally do it, but typically just type `napari` into your terminal, or click on your desktop icon)
-
-#### Load cellfinder XML file
-* Load your raw data (drag and drop the data directories into napari, one at a time)
-* Drag and drop your cellfinder XML file (e.g. `cell_classification.xml`) into napari.
+You can view your data using the [brainglobe-napari-io](https://github.com/brainglobe/brainglobe-napari-io) plugin for [napari](https://github.com/napari/napari).
 
-#### Load cellfinder directory
-* Load your raw data (drag and drop the data directories into napari, one at a time)
-* Drag and drop your cellfinder output directory into napari.
-
-The plugin will then load your detected cells (in yellow) and the rejected cell
-candidates (in blue). If you carried out registration, then these results will be
-overlaid (similarly to the loading brainreg data, but transformed to the
-coordinate space of your raw data).
-
-![load_data](https://raw.githubusercontent.com/brainglobe/brainglobe-napari-io/master/resources/load_data.gif)
-**Loading raw data**
-
-![load_data](https://raw.githubusercontent.com/brainglobe/brainglobe-napari-io/master/resources/load_results.gif)
-**Loading cellfinder results**
-
-
-## Contributing
-Contributions to cellfinder are more than welcome. Please see the [developers guide](https://brainglobe.info/developers/index.html).
-
-
-## Citing cellfinder
-
-If you find cellfinder useful, and use it in your research, please cite the paper outlining the cell detection algorithm:
-> Tyson, A. L., Rousseau, C. V., Niedworok, C. J., Keshavarzi, S., Tsitoura, C., Cossell, L., Strom, M. and Margrie, T. W. (2021) “A deep learning algorithm for 3D cell detection in whole mouse brain image datasets’ PLOS Computational Biology, 17(5), e1009074
-[https://doi.org/10.1371/journal.pcbi.1009074](https://doi.org/10.1371/journal.pcbi.1009074)
->
-If you use any of the image registration functions in cellfinder, please also cite [brainreg](https://github.com/brainglobe/brainreg#citing-brainreg).
+- Open napari (however you normally do it, but typically just type `napari` into your terminal, or click on your desktop icon).
+- Load your raw data (drag and drop the data directories into napari, one at a time). ![Loading raw data](https://raw.githubusercontent.com/brainglobe/brainglobe-napari-io/master/resources/load_data.gif)
+- Drag and drop your cellfinder XML file (e.g. `cell_classification.xml`) and/or cellfinder output directory into napari. ![Loading cellfinder results](https://raw.githubusercontent.com/brainglobe/brainglobe-napari-io/master/resources/load_results.gif)
 
-**If you use this, or any other tools in the brainglobe suite, please
- [let us know](mailto:code@adamltyson.com?subject=cellfinder), and
- we'd be happy to promote your paper/talk etc.**
+The plugin will then load your detected cells (in yellow) and the rejected cell candidates (in blue).
+If you carried out registration, then these results will be overlaid (similarly to the loading `brainreg` data, but transformed to the coordinate space of your raw data).
diff --git a/asv.conf.json b/asv.conf.json
new file mode 100644
index 00000000..d620a545
--- /dev/null
+++ b/asv.conf.json
@@ -0,0 +1,194 @@
+{
+    // The version of the config file format.  Do not change, unless
+    // you know what you are doing.
+    "version": 1,
+
+    // The name of the project being benchmarked
+    "project": "brainglobe_workflows",
+
+    // The project's homepage
+    "project_url": "https://github.com/brainglobe/brainglobe-workflows",
+
+    // The URL or local path of the source code repository for the
+    // project being benchmarked
+    "repo": ".",
+
+    // The Python project's subdirectory in your repo.  If missing or
+    // the empty string, the project is assumed to be located at the root
+    // of the repository.
+    // "repo_subdir": "",
+
+    // Customizable commands for building the project.
+    // See asv.conf.json documentation.
+    // To build the package using pyproject.toml (PEP518), uncomment the following lines
+    "build_command": [
+        "python -m pip install build",
+        "python -m build",
+        "PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}"
+    ],
+    // To build the package using setuptools and a setup.py file, uncomment the following lines
+    // "build_command": [
+    //     "python setup.py build",
+    //     "PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}"
+    // ],
+
+    // Customizable commands for installing and uninstalling the project.
+    // See asv.conf.json documentation.
+    "install_command": ["in-dir={env_dir} python -mpip install --force-reinstall {wheel_file}"],
+    "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"],
+
+    // List of branches to benchmark. If not provided, defaults to "master"
+    // (for git) or "default" (for mercurial).
+    "branches": ["main"], // for git
+    // "branches": ["default"],    // for mercurial
+
+    // The DVCS being used.  If not set, it will be automatically
+    // determined from "repo" by looking at the protocol in the URL
+    // (if remote), or by looking for special directories, such as
+    // ".git" (if local).
+    // "dvcs": "git",
+
+    // The tool to use to create environments.  May be "conda",
+    // "virtualenv", "mamba" (above 3.8)
+    // or other value depending on the plugins in use.
+    // If missing or the empty string, the tool will be automatically
+    // determined by looking for tools on the PATH environment
+    // variable.
+    "environment_type": "conda",
+
+    // timeout in seconds for installing any dependencies in environment
+    // defaults to 10 min
+    //"install_timeout": 600,
+
+    // the base URL to show a commit for the project.
+    "show_commit_url": "https://github.com/brainglobe/brainglobe-workflows/commit/",
+
+    // The Pythons you'd like to test against.  If not provided, defaults
+    // to the current version of Python used to run `asv`.
+    "pythons": ["3.10"],
+
+    // The list of conda channel names to be searched for benchmark
+    // dependency packages in the specified order
+    "conda_channels": ["conda-forge", "defaults"],
+
+    // A conda environment file that is used for environment creation.
+    // "conda_environment_file": "environment.yml",
+
+    // The matrix of dependencies to test.  Each key of the "req"
+    // requirements dictionary is the name of a package (in PyPI) and
+    // the values are version numbers.  An empty list or empty string
+    // indicates to just test against the default (latest)
+    // version. null indicates that the package is to not be
+    // installed. If the package to be tested is only available from
+    // PyPi, and the 'environment_type' is conda, then you can preface
+    // the package name by 'pip+', and the package will be installed
+    // via pip (with all the conda available packages installed first,
+    // followed by the pip installed packages).
+    //
+    // The ``@env`` and ``@env_nobuild`` keys contain the matrix of
+    // environment variables to pass to build and benchmark commands.
+    // An environment will be created for every combination of the
+    // cartesian product of the "@env" variables in this matrix.
+    // Variables in "@env_nobuild" will be passed to every environment
+    // during the benchmark phase, but will not trigger creation of
+    // new environments.  A value of ``null`` means that the variable
+    // will not be set for the current combination.
+    //
+    // "matrix": {
+    //     "req": {
+    //         "numpy": ["1.6", "1.7"],
+    //         "six": ["", null],  // test with and without six installed
+    //         "pip+emcee": [""]   // emcee is only available for install with pip.
+    //     },
+    //     "env": {"ENV_VAR_1": ["val1", "val2"]},
+    //     "env_nobuild": {"ENV_VAR_2": ["val3", null]},
+    // },
+
+    // Combinations of libraries/python versions can be excluded/included
+    // from the set to test. Each entry is a dictionary containing additional
+    // key-value pairs to include/exclude.
+    //
+    // An exclude entry excludes entries where all values match. The
+    // values are regexps that should match the whole string.
+    //
+    // An include entry adds an environment. Only the packages listed
+    // are installed. The 'python' key is required. The exclude rules
+    // do not apply to includes.
+    //
+    // In addition to package names, the following keys are available:
+    //
+    // - python
+    //     Python version, as in the *pythons* variable above.
+    // - environment_type
+    //     Environment type, as above.
+    // - sys_platform
+    //     Platform, as in sys.platform. Possible values for the common
+    //     cases: 'linux2', 'win32', 'cygwin', 'darwin'.
+    // - req
+    //     Required packages
+    // - env
+    //     Environment variables
+    // - env_nobuild
+    //     Non-build environment variables
+    //
+    // "exclude": [
+    //     {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows
+    //     {"environment_type": "conda", "req": {"six": null}}, // don't run without six on conda
+    //     {"env": {"ENV_VAR_1": "val2"}}, // skip val2 for ENV_VAR_1
+    // ],
+    //
+    // "include": [
+    //     // additional env for python2.7
+    //     {"python": "2.7", "req": {"numpy": "1.8"}, "env_nobuild": {"FOO": "123"}},
+    //     // additional env if run on windows+conda
+    //     {"platform": "win32", "environment_type": "conda", "python": "2.7", "req": {"libpython": ""}},
+    // ],
+
+    // The directory (relative to the current directory) that benchmarks are
+    // stored in.  If not provided, defaults to "benchmarks"
+    // "benchmark_dir": "benchmarks",
+
+    // The directory (relative to the current directory) to cache the Python
+    // environments in.  If not provided, defaults to "env"
+    "env_dir": ".asv/env",
+
+    // The directory (relative to the current directory) that raw benchmark
+    // results are stored in.  If not provided, defaults to "results".
+    "results_dir": "benchmarks/results",
+
+    // The directory (relative to the current directory) that the html tree
+    // should be written to.  If not provided, defaults to "html".
+    "html_dir": "benchmarks/html",
+
+    // The number of characters to retain in the commit hashes.
+    // "hash_length": 8,
+
+    // `asv` will cache results of the recent builds in each
+    // environment, making them faster to install next time.  This is
+    // the number of builds to keep, per environment.
+    "build_cache_size": 2,
+
+    // The commits after which the regression search in `asv publish`
+    // should start looking for regressions. Dictionary whose keys are
+    // regexps matching to benchmark names, and values corresponding to
+    // the commit (exclusive) after which to start looking for
+    // regressions.  The default is to start from the first commit
+    // with results. If the commit is `null`, regression detection is
+    // skipped for the matching benchmark.
+    //
+    // "regressions_first_commits": {
+    //    "some_benchmark": "352cdf",  // Consider regressions only after this commit
+    //    "another_benchmark": null,   // Skip regression detection altogether
+    // },
+
+    // The thresholds for relative change in results, after which `asv
+    // publish` starts reporting regressions. Dictionary of the same
+    // form as in ``regressions_first_commits``, with values
+    // indicating the thresholds.  If multiple entries match, the
+    // maximum is taken. If no entry matches, the default is 5%.
+    //
+    // "regressions_thresholds": {
+    //    "some_benchmark": 0.01,     // Threshold of 1%
+    //    "another_benchmark": 0.5,   // Threshold of 50%
+    // },
+}
diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/benchmarks/cellfinder.py b/benchmarks/cellfinder.py
new file mode 100644
index 00000000..76d364bc
--- /dev/null
+++ b/benchmarks/cellfinder.py
@@ -0,0 +1,227 @@
+import json
+import shutil
+from pathlib import Path
+
+import pooch
+from brainglobe_utils.IO.cells import save_cells
+from cellfinder_core.main import main as cellfinder_run
+from cellfinder_core.tools.IO import read_with_dask
+
+from brainglobe_workflows.cellfinder.cellfinder_main import (
+    DEFAULT_JSON_CONFIG_PATH,
+    CellfinderConfig,
+    run_workflow_from_cellfinder_run,
+)
+from brainglobe_workflows.cellfinder.cellfinder_main import (
+    setup as setup_cellfinder_workflow,
+)
+
+
+class TimeBenchmarkPrepGIN:
+    """
+
+    A base class for timing benchmarks for the cellfinder workflow.
+
+    It includes:
+     - a setup_cache function that downloads the GIN data specified in the
+       default_config.json to a local directory (created by asv). This function
+       runs only once before all repeats of the benchmark.
+    -  a setup function, that runs the setup steps for the workflow.
+    - a teardown function, that removes the output directory.
+
+    Notes
+    -----
+    The class includes some predefined attributes for timing benchmarks. For
+    the full list see
+    https://asv.readthedocs.io/en/stable/benchmarks.html#benchmark-attributes
+
+    Some asv benchmarking nomenclature:
+    - repeat: a benchmark repeat is made up of the following steps:
+      1- the `setup` is run,
+      2- then the timed benchmark routine is called for `n` iterations, and
+      3- finally that teardown function is run.
+      Each repeat generates a sample, which is the average time that the
+      routine took across all iterations. A new process is started for each
+      repeat of each benchmark. A calibration phase before running the repeat
+      computes the number of iterations that will be executed. Each benchmark
+      is run for a number of repeats. The setup_cache function is run only once
+      for all repeats of a benchmark (but it is discarded before the next
+      benchmark). By default `repeat` is set to 0, which means:
+        - if rounds==1 the default is
+            (min_repeat, max_repeat, max_time) = (1, 10, 20.0),
+        - if rounds != 1 the default is
+            (min_repeat, max_repeat, max_time) = (1, 5, 10.0)
+
+    - iterations (`number`): the number of iterations in each sample. Note that
+      `setup` and `teardown` are not run between iterations. asv will
+      automatically select the number of iterations so that each sample takes
+      approximately `sample_time` seconds.
+
+    - round: at each round, each benchmark is run for the specified number of
+      repeats. The idea is that we sample each benchmark over longer periods of
+      background performance variations.
+
+    - warmup time: asv will spend this time (in seconds) in calling the
+      benchmarked function repeatedly, before starting to run the actual
+      benchmark. If not specified, warmup_time defaults to 0.1 seconds
+
+    """
+
+    # Timing attributes
+    timeout = 600  # default: 60 s
+    version = (
+        None  # benchmark version. Default:None (i.e. hash of source code)
+    )
+    warmup_time = 0.1  # seconds
+    rounds = 2
+    repeat = 0
+    sample_time = 0.01  # default: 10 ms = 0.01 s;
+    min_run_count = 2  # default:2
+
+    # Custom attributes
+    input_config_path = str(DEFAULT_JSON_CONFIG_PATH)
+
+    def setup_cache(
+        self,
+    ):
+        """
+        Download the input data from the GIN repository to the local
+        directory specified in the default_config.json
+
+        Notes
+        -----
+        The `setup_cache` method only performs the computations once
+        per benchmark round and then caches the result to disk [1]_. It cannot
+        be parametrised [2]_.
+
+
+        [1] https://asv.readthedocs.io/en/latest/writing_benchmarks.html#setup-and-teardown-functions
+        [2] https://asv.readthedocs.io/en/latest/writing_benchmarks.html#parameterized-benchmarks
+        """
+
+        # Check config file exists
+        assert Path(self.input_config_path).exists()
+
+        # Instantiate a CellfinderConfig from the input json file
+        # (assumes config is json serializable)
+        with open(self.input_config_path) as cfg:
+            config_dict = json.load(cfg)
+        config = CellfinderConfig(**config_dict)
+
+        # Download data with pooch
+        _ = pooch.retrieve(
+            url=config.data_url,
+            known_hash=config.data_hash,
+            path=config.install_path,
+            progressbar=True,
+            processor=pooch.Unzip(extract_dir=config.extract_dir_relative),
+        )
+
+        # Check paths to input data should now exist in config
+        assert Path(config.signal_dir_path).exists()
+        assert Path(config.background_dir_path).exists()
+
+    def setup(self):
+        """
+        Run the cellfinder workflow setup steps.
+
+        The command line input arguments are injected as dependencies.
+        """
+
+        # Run setup
+        cfg = setup_cellfinder_workflow(
+            [
+                "--config",
+                self.input_config_path,
+            ]
+        )
+
+        # Save configuration as attribute
+        self.cfg = cfg
+
+    def teardown(self):
+        """
+        Remove the cellfinder output directory.
+
+        The input data is kept for all repeats of the same benchmark,
+        to avoid repeated downloads from GIN.
+        """
+        shutil.rmtree(Path(self.cfg.output_path).resolve())
+
+
+class TimeFullWorkflow(TimeBenchmarkPrepGIN):
+    """
+    Time the full cellfinder workflow.
+
+    It includes reading the signal and background arrays with dask,
+    detecting the cells and saving the results to an XML file
+
+    Parameters
+    ----------
+    TimeBenchmarkPrepGIN : _type_
+        A base class for timing benchmarks for the cellfinder workflow.
+    """
+
+    def time_workflow_from_cellfinder_run(self):
+        run_workflow_from_cellfinder_run(self.cfg)
+
+
+class TimeReadInputDask(TimeBenchmarkPrepGIN):
+    """
+    Time the reading input data operations with dask
+
+    Parameters
+    ----------
+    TimeBenchmarkPrepGIN : _type_
+        A base class for timing benchmarks for the cellfinder workflow.
+    """
+
+    def time_read_signal_with_dask(self):
+        read_with_dask(self.cfg.signal_dir_path)
+
+    def time_read_background_with_dask(self):
+        read_with_dask(self.cfg.background_dir_path)
+
+
+class TimeDetectCells(TimeBenchmarkPrepGIN):
+    """
+    Time the cell detection main pipeline (`cellfinder_run`)
+
+    Parameters
+    ----------
+    TimeBenchmarkPrepGIN : _type_
+        A base class for timing benchmarks for the cellfinder workflow.
+    """
+
+    # extend basic setup function
+    def setup(self):
+        # basic setup
+        TimeBenchmarkPrepGIN.setup(self)
+
+        # add input data as arrays to config
+        self.signal_array = read_with_dask(self.cfg.signal_dir_path)
+        self.background_array = read_with_dask(self.cfg.background_dir_path)
+
+    def time_cellfinder_run(self):
+        cellfinder_run(
+            self.signal_array, self.background_array, self.cfg.voxel_sizes
+        )
+
+
+class TimeSaveCells(TimeBenchmarkPrepGIN):
+    # extend basic setup function
+    def setup(self):
+        # basic setup
+        TimeBenchmarkPrepGIN.setup(self)
+
+        # add input data as arrays to config
+        self.signal_array = read_with_dask(self.cfg.signal_dir_path)
+        self.background_array = read_with_dask(self.cfg.background_dir_path)
+
+        # detect cells
+        self.detected_cells = cellfinder_run(
+            self.signal_array, self.background_array, self.cfg.voxel_sizes
+        )
+
+    def time_save_cells(self):
+        save_cells(self.detected_cells, self.cfg.detected_cells_path)
diff --git a/brainglobe_workflows/__init__.py b/brainglobe_workflows/__init__.py
index 9f726d43..5fd437c6 100644
--- a/brainglobe_workflows/__init__.py
+++ b/brainglobe_workflows/__init__.py
@@ -1,7 +1,11 @@
-from importlib.metadata import metadata
+from importlib.metadata import PackageNotFoundError, metadata
 
-__version__ = metadata("brainglobe_workflows")["version"]
-__author__ = metadata("brainglobe_workflows")["author-email"]
-__license__ = metadata("brainglobe_workflows")["license"]
+try:
+    __version__ = metadata("brainglobe_workflows")["version"]
+    __author__ = metadata("brainglobe_workflows")["author-email"]
+    __license__ = metadata("brainglobe_workflows")["license"]
+except PackageNotFoundError:
+    # Package not installed
+    pass
 
 del metadata
diff --git a/brainglobe_workflows/cellfinder/__init__.py b/brainglobe_workflows/cellfinder/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/brainglobe_workflows/cellfinder/cellfinder_main.py b/brainglobe_workflows/cellfinder/cellfinder_main.py
new file mode 100644
index 00000000..fd19db34
--- /dev/null
+++ b/brainglobe_workflows/cellfinder/cellfinder_main.py
@@ -0,0 +1,404 @@
+"""This script reproduces the most common cellfinder workflow
+
+It receives as an (optional) command line input the path to a configuration
+json file, that holds the values of the required parameters for the workflow.
+
+If no input json file is passed as a configuration, the default
+configuration defined at brainglobe_workflows/cellfinder/default_config.json
+is used.
+
+Example usage:
+ - to pass a custom configuration, run (from the cellfinder_main.py
+   parent directory):
+    python cellfinder_main.py --config path/to/input/config.json
+ - to use the default configuration, run
+    python cellfinder_main.py
+
+
+"""
+
+import argparse
+import datetime
+import json
+import logging
+import os
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Optional, Tuple, Union
+
+import pooch
+from brainglobe_utils.IO.cells import save_cells
+from cellfinder_core.main import main as cellfinder_run
+from cellfinder_core.tools.IO import read_with_dask
+from cellfinder_core.train.train_yml import depth_type
+
+Pathlike = Union[str, os.PathLike]
+
+DEFAULT_JSON_CONFIG_PATH = (
+    Path(__file__).resolve().parent / "default_config.json"
+)
+
+
+@dataclass
+class CellfinderConfig:
+    """
+    Define input and output data locations, and the parameters for
+    the cellfinder preprocessing steps.
+    """
+
+    # cellfinder workflows cache directory
+    install_path: Pathlike
+
+    # cached subdirectory to save data to
+    extract_dir_relative: Pathlike
+    signal_subdir: str
+    background_subdir: str
+    output_path_basename_relative: Pathlike
+    detected_cells_filename: Pathlike
+
+    # preprocessing parameters
+    voxel_sizes: Tuple[float, float, float]
+    start_plane: int
+    end_plane: int
+    trained_model: Optional[
+        os.PathLike
+    ]  # if None, it will use a default model
+    model_weights: Optional[os.PathLike]
+    model: str
+    batch_size: int
+    n_free_cpus: int
+    network_voxel_sizes: Tuple[int, int, int]
+    soma_diameter: int
+    ball_xy_size: int
+    ball_z_size: int
+    ball_overlap_fraction: float
+    log_sigma_size: float
+    n_sds_above_mean_thresh: int
+    soma_spread_factor: float
+    max_cluster_size: int
+    cube_width: int
+    cube_height: int
+    cube_depth: int
+    network_depth: depth_type
+
+    # origin of data to download (if required)
+    data_url: Optional[str] = None
+    data_hash: Optional[str] = None
+
+    # The following attributes are added
+    # during the setup phase of the workflow
+    list_signal_files: Optional[list] = None
+    list_background_files: Optional[list] = None
+    output_path: Pathlike = ""
+    signal_dir_path: Pathlike = ""
+    background_dir_path: Pathlike = ""
+    detected_cells_path: Pathlike = ""
+
+
+def setup(argv=None) -> CellfinderConfig:
+    def parse_cli_arguments(argv_) -> argparse.Namespace:
+        """Define argument parser for cellfinder
+        workflow script.
+
+        It expects a path to a json file with the
+        parameters required to run the workflow.
+        If none is provided, the default
+
+        Returns
+        -------
+        args : argparse.Namespace
+            command line input arguments parsed
+        """
+        # initialise argument parser
+        parser = argparse.ArgumentParser(
+            description=(
+                "To launch the workflow with "
+                "a specific set of input parameters, run: "
+                "`python cellfinder_main.py --config path/to/config.json`"
+                "where path/to/input/config.json is the json file "
+                "containing the workflow parameters."
+            )
+        )
+        # add arguments
+        parser.add_argument(
+            "-c",
+            "--config",
+            default=str(DEFAULT_JSON_CONFIG_PATH),
+            type=str,
+            metavar="CONFIG",  # a name for usage messages
+            help="",
+        )
+
+        # build parser object
+        args = parser.parse_args(argv_)
+
+        # print error if required arguments not provided
+        if not args.config:
+            logger.error("Paths to input config not provided.")
+            parser.print_help()
+
+        return args
+
+    def setup_logger() -> logging.Logger:
+        """Setup a logger for this script
+
+        The logger's level is set to DEBUG, and it
+        is linked to a handler that writes to the
+        console and whose level is
+
+        Returns
+        -------
+        logging.Logger
+            a logger object
+        """
+        # define handler that writes to stdout
+        console_handler = logging.StreamHandler(sys.stdout)
+        console_format = logging.Formatter(
+            "%(name)s %(levelname)s: %(message)s"
+        )
+        console_handler.setFormatter(console_format)
+
+        # define logger and link to handler
+        logger = logging.getLogger(
+            __name__
+        )  # if imported as a module, the logger is named after the module
+        logger.setLevel(logging.DEBUG)
+        logger.addHandler(console_handler)
+        return logger
+
+    def setup_workflow(input_config_path: Path) -> CellfinderConfig:
+        """Run setup steps prior to executing the workflow
+
+        These setup steps include:
+        - instantiating a CellfinderConfig object with the required parameters,
+        - checking if the input data exists locally, and fetching from
+        GIN repository otherwise,
+        - adding the path to the input data files to the config, and
+        - creating a timestamped directory for the output of the workflow if
+        it doesn't exist and adding its path to the config
+
+        Parameters
+        ----------
+        input_config_path : Path
+            path to the input config file
+
+        Returns
+        -------
+        config : CellfinderConfig
+            a dataclass whose attributes are the parameters
+            for running cellfinder.
+        """
+
+        # Check config file exists
+        assert input_config_path.exists()
+
+        # Instantiate a CellfinderConfig from the input json file
+        # (assumes config is json serializable)
+        with open(input_config_path) as cfg:
+            config_dict = json.load(cfg)
+        config = CellfinderConfig(**config_dict)
+
+        # Print info logs for status
+        logger.info(f"Input config read from {input_config_path}")
+        if input_config_path == DEFAULT_JSON_CONFIG_PATH:
+            logger.info("Using default config file")
+
+        # Retrieve and add lists of input data to the config,
+        # if these are defined yet
+        if not (config.list_signal_files and config.list_signal_files):
+            # build fullpaths to inputs
+            config.signal_dir_path = str(
+                Path(config.install_path)
+                / config.extract_dir_relative
+                / config.signal_subdir
+            )
+            config.background_dir_path = str(
+                Path(config.install_path)
+                / config.extract_dir_relative
+                / config.background_subdir
+            )
+            # retrieve data
+            config = retrieve_input_data(config)
+
+        # Create timestamped output directory if it doesn't exist
+        timestamp = datetime.datetime.now()
+        timestamp_formatted = timestamp.strftime("%Y%m%d_%H%M%S")
+        output_path_timestamped = Path(config.install_path) / (
+            str(config.output_path_basename_relative) + timestamp_formatted
+        )
+        output_path_timestamped.mkdir(parents=True, exist_ok=True)
+
+        # Add output path and output file path to config
+        config.output_path = output_path_timestamped
+        config.detected_cells_path = (
+            config.output_path / config.detected_cells_filename
+        )
+
+        return config
+
+    def retrieve_input_data(config: CellfinderConfig) -> CellfinderConfig:
+        """
+        Adds the lists of input data files (signal and background)
+        to the config.
+
+        It first checks if the input data exists locally.
+        - If both directories (signal and background) exist, the lists of
+        signal and background files are added to the config.
+        - If exactly one of the input data directories is missing, an error
+        message is logged.
+        - If neither of them exist, the data is retrieved from the provided GIN
+        repository. If no URL or hash to GIN is provided, an error is shown.
+
+        Parameters
+        ----------
+        config : CellfinderConfig
+            a dataclass whose attributes are the parameters
+            for running cellfinder.
+
+        Returns
+        -------
+        config : CellfinderConfig
+            a dataclass whose attributes are the parameters
+            for running cellfinder.
+        """
+        # Check if input data (signal and background) exist locally.
+        # If both directories exist, get list of signal and background files
+        if (
+            Path(config.signal_dir_path).exists()
+            and Path(config.background_dir_path).exists()
+        ):
+            logger.info("Fetching input data from the local directories")
+
+            config.list_signal_files = [
+                f
+                for f in Path(config.signal_dir_path).resolve().iterdir()
+                if f.is_file()
+            ]
+            config.list_background_files = [
+                f
+                for f in Path(config.background_dir_path).resolve().iterdir()
+                if f.is_file()
+            ]
+
+        # If exactly one of the input data directories is missing, print error
+        elif (
+            Path(config.signal_dir_path).resolve().exists()
+            or Path(config.background_dir_path).resolve().exists()
+        ):
+            if not Path(config.signal_dir_path).resolve().exists():
+                logger.error(
+                    f"The directory {config.signal_dir_path} does not exist"
+                )
+            else:
+                logger.error(
+                    f"The directory {config.background_dir_path} "
+                    "does not exist"
+                )
+
+        # If neither of them exist, retrieve data from GIN repository
+        else:
+            # check if GIN URL and hash are defined (log error otherwise)
+            if (not config.data_url) or (not config.data_hash):
+                logger.error(
+                    "Input data not found locally, and URL/hash to "
+                    "GIN repository not provided"
+                )
+
+            else:
+                # get list of files in GIN archive with pooch.retrieve
+                list_files_archive = pooch.retrieve(
+                    url=config.data_url,
+                    known_hash=config.data_hash,
+                    path=config.install_path,  # zip will be downloaded here
+                    progressbar=True,
+                    processor=pooch.Unzip(
+                        extract_dir=config.extract_dir_relative
+                        # path to unzipped dir,
+                        # *relative* to the path set in 'path'
+                    ),
+                )
+                logger.info(
+                    "Fetching input data from the provided GIN repository"
+                )
+
+                # Check signal and background parent directories exist now
+                assert Path(config.signal_dir_path).resolve().exists()
+                assert Path(config.background_dir_path).resolve().exists()
+
+                # Add signal files to config
+                config.list_signal_files = [
+                    f
+                    for f in list_files_archive
+                    if f.startswith(
+                        str(Path(config.signal_dir_path).resolve())
+                    )  # if str(config.signal_dir_path) in f
+                ]
+
+                # Add background files to config
+                config.list_background_files = [
+                    f
+                    for f in list_files_archive
+                    if f.startswith(
+                        str(Path(config.background_dir_path).resolve())
+                    )  # if str(config.background_dir_path) in f
+                ]
+
+        return config
+
+    # parse command line input arguments:
+    # sys.argv in most cases except for testing
+    # see https://paiml.com/docs/home/books/testing-in-python/chapter08-monkeypatching/#the-simplest-monkeypatching
+    argv = argv or sys.argv[1:]
+    args = parse_cli_arguments(argv)
+
+    # setup logger
+    logger = setup_logger()
+
+    # run setup steps and return config
+    cfg = setup_workflow(Path(args.config))
+
+    return cfg
+
+
+def run_workflow_from_cellfinder_run(cfg: CellfinderConfig):
+    """
+    Run workflow based on the cellfinder_core.main.main()
+    function.
+
+    The steps are:
+    1. Read the input signal and background data as two separate
+       Dask arrays.
+    2. Run the main cellfinder pipeline on the input Dask arrays,
+       with the parameters defined in the input configuration (cfg).
+    3. Save the detected cells as an xml file to the location specified in
+       the input configuration (cfg).
+
+    Parameters
+    ----------
+    cfg : CellfinderConfig
+        a class with the required setup methods and parameters for
+        the cellfinder workflow
+    """
+    # Read input data as Dask arrays
+    signal_array = read_with_dask(cfg.signal_dir_path)
+    background_array = read_with_dask(cfg.background_dir_path)
+
+    # Run main analysis using `cellfinder_run`
+    detected_cells = cellfinder_run(
+        signal_array, background_array, cfg.voxel_sizes
+    )
+
+    # Save results to xml file
+    save_cells(
+        detected_cells,
+        cfg.detected_cells_path,
+    )
+
+
+if __name__ == "__main__":
+    # run setup
+    cfg = setup()
+
+    # run workflow
+    run_workflow_from_cellfinder_run(cfg)  # only this will be benchmarked
diff --git a/brainglobe_workflows/cellfinder/default_config.json b/brainglobe_workflows/cellfinder/default_config.json
new file mode 100644
index 00000000..a80a4ba4
--- /dev/null
+++ b/brainglobe_workflows/cellfinder/default_config.json
@@ -0,0 +1,39 @@
+{
+  "install_path": ".cellfinder_workflows",
+  "data_url": "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip",
+  "data_hash": "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914",
+  "extract_dir_relative": "cellfinder_test_data",
+  "signal_subdir": "signal",
+  "background_subdir": "background",
+  "output_path_basename_relative": "cellfinder_output_",
+  "detected_cells_filename": "detected_cells.xml",
+  "voxel_sizes": [
+    5,
+    2,
+    2
+  ],
+  "start_plane": 0,
+  "end_plane": -1,
+  "trained_model": null,
+  "model_weights": null,
+  "model": "resnet50_tv",
+  "batch_size": 32,
+  "n_free_cpus": 2,
+  "network_voxel_sizes": [
+    5,
+    1,
+    1
+  ],
+  "soma_diameter": 16,
+  "ball_xy_size": 6,
+  "ball_z_size": 15,
+  "ball_overlap_fraction": 0.6,
+  "log_sigma_size": 0.2,
+  "n_sds_above_mean_thresh": 10,
+  "soma_spread_factor": 1.4,
+  "max_cluster_size": 100000,
+  "cube_width": 50,
+  "cube_height": 50,
+  "cube_depth": 20,
+  "network_depth": "50"
+}
diff --git a/pyproject.toml b/pyproject.toml
index e387c824..18bc443b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,29 +1,29 @@
 [project]
 name = "brainglobe-workflows"
-description = "Automated 3D cell detection and registration of whole-brain images"
-readme = "README.md"
-license = { file = "LICENSE" }
-requires-python = ">=3.9"
 authors = [
     { name = "Adam Tyson", email = "code@adamltyson.com" },
+    { name = "BrainGlobe developers", email = "code@adamltyson.com" },
     { name = "Christian Niedworok" },
     { name = "Charly Rousseau" },
 ]
+description = "A collection of end-to-end data analysis workflows executed using BrainGlobe tools."
+readme = "README.md"
+license = { file = "LICENSE" }
+requires-python = ">=3.9"
 classifiers = [
     "Development Status :: 3 - Alpha",
-    "Operating System :: POSIX :: Linux",
-    "Operating System :: Microsoft :: Windows :: Windows 10",
-    "Programming Language :: Python",
-    "Programming Language :: Python :: 3.8",
-    "Programming Language :: Python :: 3.9",
-    "Programming Language :: Python :: 3.10",
-    "Topic :: Scientific/Engineering :: Image Recognition",
     "Intended Audience :: Developers",
     "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: BSD License",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python",
+    "Topic :: Scientific/Engineering :: Image Recognition",
 ]
 dependencies = [
     "brainreg>=1.0.0",
-    "cellfinder-core>=0.2.4",
+    "cellfinder-core>=0.2.4,<1.0.0",
     "configobj",
     "fancylog>=0.0.7",
     "imio",
@@ -43,6 +43,9 @@ dynamic = ["version"]
 cellfinder = "brainglobe_workflows.main:main"
 
 [project.optional-dependencies]
+benchmark = [
+    "pooch",
+] # Depending on how asv builds wheels, we may need to play with asv.config.json to force it to include this optional dependency.
 dev = [
     "black",
     "pytest-cov",
@@ -52,24 +55,34 @@ dev = [
     "pre-commit",
     "setuptools_scm",
 ]
-napari = ["napari[pyside2]", "brainglobe-napari-io", "cellfinder-napari"]
+napari = ["napari[pyside2]", "brainglobe-napari-io", "cellfinder-napari<1.0.0"]
 
 [project.urls]
-source = "https://github.com/brainglobe/brainglobe-workflows"
-bug_tracker = "https://github.com/brainglobe/brainglobe-workflows/issues"
-homepage = "https://brainglobe.info"
-documentation = "https://brainglobe.info/documentation/brainglobe-workflows"
+"Bug Tracker" = "https://github.com/brainglobe/brainglobe-workflows/issues"
+"Documentation" = "https://brainglobe.info/documentation/brainglobe-workflows"
+"Homepage" = "https://brainglobe.info"
+"Source Code" = "https://github.com/brainglobe/brainglobe-workflows"
+
 [build-system]
 requires = ["setuptools>=45", "wheel", "setuptools_scm[toml]>=6.2"]
 build-backend = "setuptools.build_meta"
 
 [tool.black]
-target-version = ['py38', 'py39', 'py310']
+target-version = ["py39", "py310"]
 skip-string-normalization = false
 line-length = 79
+ignore = ["*.yaml"]
+
+[tool.cibuildwheel]
+build = "cp39-* cp310-*"
+
+[tool.cibuildwheel.macos]
+archs = ["x86_64", "arm64"]
 
 [tool.check-manifest]
-ignore = ["*.yaml"]
+
+[tool.pytest.ini_options]
+addopts = "--cov=brainglobe_workflows"
 
 [tool.ruff]
 line-length = 79
@@ -83,6 +96,25 @@ zip-safe = false
 
 [tool.setuptools.packages.find]
 include = ["brainglobe_workflows"]
-exclude = ["tests", "resources"]
+exclude = ["benchmarks", "tests", "resources"]
 
 [tool.setuptools_scm]
+
+[tool.tox]
+legacy_tox_ini = """
+[tox]
+envlist = py{38,39,310}
+isolated_build = True
+
+[gh-actions]
+python =
+    3.8: py38
+    3.9: py39
+    3.10: py310
+
+[testenv]
+extras =
+    dev
+commands =
+    pytest -v --color=yes --cov=brainglobe_workflows --cov-report=xml
+"""
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_integration/__init__.py b/tests/test_integration/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_integration/conftest.py b/tests/test_integration/conftest.py
new file mode 100644
index 00000000..d9207917
--- /dev/null
+++ b/tests/test_integration/conftest.py
@@ -0,0 +1,290 @@
+import json
+from pathlib import Path
+from typing import Any
+
+import pooch
+import pytest
+
+from brainglobe_workflows.cellfinder.cellfinder_main import CellfinderConfig
+
+
+def make_config_dict_fetch_from_local(cellfinder_cache_dir: Path) -> dict:
+    """Generate a config dictionary with the required parameters
+    for the workflow
+
+    The input data is assumed to be locally at cellfinder_cache_dir.
+    The results are saved in a timestamped output subdirectory under
+    cellfinder_cache_dir
+
+    Parameters
+    ----------
+    cellfinder_cache_dir : Path
+        Path to the directory where the downloaded input data will be unzipped,
+        and the output will be saved
+
+    Returns
+    -------
+    dict
+        dictionary with the required parameters for the workflow
+    """
+    return {
+        "install_path": cellfinder_cache_dir,
+        "extract_dir_relative": "cellfinder_test_data",  # relative path
+        "signal_subdir": "signal",
+        "background_subdir": "background",
+        "output_path_basename_relative": "cellfinder_output_",
+        "detected_cells_filename": "detected_cells.xml",
+        "voxel_sizes": [5, 2, 2],  # microns
+        "start_plane": 0,
+        "end_plane": -1,
+        "trained_model": None,  # if None, it will use a default model
+        "model_weights": None,
+        "model": "resnet50_tv",
+        "batch_size": 32,
+        "n_free_cpus": 2,
+        "network_voxel_sizes": [5, 1, 1],
+        "soma_diameter": 16,
+        "ball_xy_size": 6,
+        "ball_z_size": 15,
+        "ball_overlap_fraction": 0.6,
+        "log_sigma_size": 0.2,
+        "n_sds_above_mean_thresh": 10,
+        "soma_spread_factor": 1.4,
+        "max_cluster_size": 100000,
+        "cube_width": 50,
+        "cube_height": 50,
+        "cube_depth": 20,
+        "network_depth": "50",
+    }
+
+
+def make_config_dict_fetch_from_GIN(
+    cellfinder_cache_dir: Path,
+    data_url: str,
+    data_hash: str,
+) -> dict:
+    """Generate a config dictionary with the required parameters
+    for the workflow
+
+    The input data is fetched from GIN and downloaded to cellfinder_cache_dir.
+    The results are also saved in a timestamped output subdirectory under
+    cellfinder_cache_dir
+
+    Parameters
+    ----------
+    cellfinder_cache_dir : Path
+        Path to the directory where the downloaded input data will be unzipped,
+        and the output will be saved
+    data_url: str
+        URL to the GIN repository with the data to download
+    data_hash: str
+        Hash of the data to download
+
+    Returns
+    -------
+    dict
+        dictionary with the required parameters for the workflow
+    """
+
+    config = make_config_dict_fetch_from_local(cellfinder_cache_dir)
+    config["data_url"] = data_url
+    config["data_hash"] = data_hash
+
+    return config
+
+
+def prep_json(obj: Any) -> Any:
+    """
+    Returns a JSON encodable version of the input object.
+
+    It uses the JSON default encoder for all objects
+    except those of type `Path`.
+
+
+    Parameters
+    ----------
+    obj : Any
+        _description_
+
+    Returns
+    -------
+    Any
+        JSON serializable version of input object
+    """
+    if isinstance(obj, Path):
+        return str(obj)
+    else:
+        json_decoder = json.JSONEncoder()
+        return json_decoder.default(obj)
+
+
+@pytest.fixture(autouse=True)
+def cellfinder_cache_dir(tmp_path: Path) -> Path:
+    """Create a .cellfinder_workflows directory
+    under a temporary pytest directory and return
+    its path.
+
+    The temporary directory is available via pytest's tmp_path
+    fixture. A new temporary directory is created every function call
+    (i.e., scope="function")
+
+    Parameters
+    ----------
+    tmp_path : Path
+        path to pytest-generated temporary directory
+
+    Returns
+    -------
+    Path
+        path to the created cellfinder_workflows cache directory
+    """
+
+    return Path(tmp_path) / ".cellfinder_workflows"
+
+
+@pytest.fixture(scope="session")
+def data_url() -> str:
+    """Return the URL to the GIN repository with the input data
+
+    Returns
+    -------
+    str
+        URL to the GIN repository with the input data
+    """
+    return "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip"
+
+
+@pytest.fixture(scope="session")
+def data_hash() -> str:
+    """Return the hash of the GIN input data
+
+    Returns
+    -------
+    str
+        Hash to the GIN input data
+    """
+    return "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914"
+
+
+@pytest.fixture(scope="session")
+def default_json_config_path() -> Path:
+    """Return the path to the json file
+    with the default config parameters
+
+    Returns
+    -------
+    Path
+        path to the json file with the default config parameters
+    """
+    from brainglobe_workflows.cellfinder.cellfinder_main import (
+        DEFAULT_JSON_CONFIG_PATH,
+    )
+
+    return DEFAULT_JSON_CONFIG_PATH
+
+
+@pytest.fixture()
+def path_to_config_fetch_GIN(
+    tmp_path: Path, cellfinder_cache_dir: Path, data_url: str, data_hash: str
+) -> Path:
+    """Create an input config that fetches data from GIN and
+    return its path
+
+    Parameters
+    ----------
+    tmp_path : Path
+        path to a fresh pytest-generated temporary directory. The
+        generated config is saved here.
+
+    cellfinder_cache_dir : Path
+        path to the cellfinder cache directory, where the paths
+        in the config should point to.
+
+    data_url: str
+        URL to the GIN repository with the input data
+
+    data_hash: str
+        hash to the GIN input data
+
+    Returns
+    -------
+    input_config_path : Path
+        path to config file that fetches data from GIN
+    """
+    # create config dict
+    config_dict = make_config_dict_fetch_from_GIN(
+        cellfinder_cache_dir, data_url, data_hash
+    )
+
+    # create a temp json file to dump config data
+    input_config_path = (
+        tmp_path / "input_config.json"
+    )  # save it in a temp dir separate from cellfinder_cache_dir
+
+    # save config data to json file
+    with open(input_config_path, "w") as js:
+        json.dump(config_dict, js, default=prep_json)
+
+    # check json file exists
+    assert Path(input_config_path).is_file()
+
+    return input_config_path
+
+
+@pytest.fixture()
+def path_to_config_fetch_local(
+    tmp_path: Path, cellfinder_cache_dir: Path, data_url: str, data_hash: str
+) -> Path:
+    """Create an input config that points to local data and
+    return its path.
+
+    The local data is downloaded from GIN, but no reference
+    to the GIN repository is included in the config.
+
+    Parameters
+    ----------
+    tmp_path : Path
+        path to a fresh pytest-generated temporary directory. The
+        generated config is saved here.
+
+    cellfinder_cache_dir : Path
+        path to the cellfinder cache directory, where the paths
+        in the config should point to.
+
+    data_url: str
+        URL to the GIN repository with the input data
+
+    data_hash: str
+        hash to the GIN input data
+
+    Returns
+    -------
+    path_to_config_fetch_GIN : Path
+        path to a config file that fetches data from GIN
+    """
+
+    # instantiate basic config (assumes data is local)
+    config_dict = make_config_dict_fetch_from_local(cellfinder_cache_dir)
+    config = CellfinderConfig(**config_dict)
+
+    # download GIN data to specified local directory
+    pooch.retrieve(
+        url=data_url,
+        known_hash=data_hash,
+        path=config.install_path,  # path to download zip to
+        progressbar=True,
+        processor=pooch.Unzip(
+            extract_dir=config.extract_dir_relative
+            # path to unzipped dir, *relative*  to 'path'
+        ),
+    )
+
+    # save config to json
+    input_config_path = tmp_path / "input_config.json"
+    with open(input_config_path, "w") as js:
+        json.dump(config_dict, js, default=prep_json)
+
+    # check json file exists
+    assert Path(input_config_path).is_file()
+
+    return input_config_path
diff --git a/tests/test_integration/test_cellfinder_workflow.py b/tests/test_integration/test_cellfinder_workflow.py
new file mode 100644
index 00000000..e55d0a46
--- /dev/null
+++ b/tests/test_integration/test_cellfinder_workflow.py
@@ -0,0 +1,211 @@
+import json
+import subprocess
+import sys
+from pathlib import Path
+
+from brainglobe_workflows.cellfinder.cellfinder_main import CellfinderConfig
+
+
+def test_run_with_default_config(tmp_path, default_json_config_path):
+    """Test workflow run with no command line arguments
+
+    If no command line arguments are provided, the default
+    config at brainglobe_workflows/cellfinder/default_config.json
+    should be used.
+
+    After the workflow is run we check that:
+    - there are no errors (via returncode),
+    - the logs reflect the default config file was used, and
+    - a single output directory exists with the expected
+      output file inside it
+
+    Parameters
+    ----------
+    tmp_path : Path
+        path to a pytest-generated temporary directory.
+    """
+
+    # run workflow with no CLI arguments,
+    # with cwd=tmp_path
+    subprocess_output = subprocess.run(
+        [
+            sys.executable,
+            Path(__file__).resolve().parents[2]
+            / "brainglobe_workflows"
+            / "cellfinder"
+            / "cellfinder_main.py",
+        ],
+        cwd=tmp_path,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        text=True,
+        encoding="utf-8",
+    )
+
+    # check returncode
+    assert subprocess_output.returncode == 0
+
+    # check logs
+    assert "Using default config file" in subprocess_output.stdout
+
+    # Check one output directory exists and has expected
+    # output file inside it
+    assert_outputs(default_json_config_path, tmp_path)
+
+
+def test_run_with_GIN_data(
+    path_to_config_fetch_GIN,
+):
+    """Test workflow runs when passing a config that fetches data
+    from the GIN repository
+
+    After the workflow is run we check that:
+    - there are no errors (via returncode),
+    - the logs reflect the input config file was used,
+    - the logs reflect the data was downloaded from GIN, and
+    - a single output directory exists with the expected
+      output file inside it
+
+    Parameters
+    ----------
+    tmp_path : Path
+        path to a pytest-generated temporary directory.
+    """
+    # run workflow with CLI and capture log
+    subprocess_output = subprocess.run(
+        [
+            sys.executable,
+            Path(__file__).resolve().parents[2]
+            / "brainglobe_workflows"
+            / "cellfinder"
+            / "cellfinder_main.py",
+            "--config",
+            str(path_to_config_fetch_GIN),
+        ],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        text=True,
+        encoding="utf-8",
+    )
+
+    # check returncode
+    assert subprocess_output.returncode == 0
+
+    # check logs
+    assert (
+        f"Input config read from {str(path_to_config_fetch_GIN)}"
+        in subprocess_output.stdout
+    )
+    assert (
+        "Fetching input data from the provided GIN repository"
+        in subprocess_output.stdout
+    )
+
+    # check one output directory exists and
+    # has expected output file inside it
+    assert_outputs(path_to_config_fetch_GIN)
+
+
+def test_run_with_local_data(
+    path_to_config_fetch_local,
+):
+    """Test workflow runs when passing a config that uses
+    local data
+
+    After the workflow is run we check that:
+    - there are no errors (via returncode),
+    - the logs reflect the input config file was used,
+    - the logs reflect the data was found locally, and
+    - a single output directory exists with the expected
+      output file inside it
+
+    Parameters
+    ----------
+    tmp_path : Path
+        path to a pytest-generated temporary directory.
+    """
+
+    # run workflow with CLI
+    subprocess_output = subprocess.run(
+        [
+            sys.executable,
+            Path(__file__).resolve().parents[2]
+            / "brainglobe_workflows"
+            / "cellfinder"
+            / "cellfinder_main.py",
+            "--config",
+            str(path_to_config_fetch_local),
+        ],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        text=True,
+        encoding="utf-8",
+    )
+
+    # check returncode
+    assert subprocess_output.returncode == 0
+
+    # check logs
+    assert (
+        f"Input config read from {str(path_to_config_fetch_local)}"
+        in subprocess_output.stdout
+    )
+    assert (
+        "Fetching input data from the local directories"
+        in subprocess_output.stdout
+    )
+
+    # check one output directory exists and
+    # has expected output file inside it
+    assert_outputs(path_to_config_fetch_local)
+
+
+def assert_outputs(path_to_config, parent_dir_of_install_path=""):
+    """Helper function to determine whether the output is
+    as expected.
+
+    It checks that:
+     - a single output directory exists, and
+     - the expected output file exists inside it
+
+    Note that config.output_path is only defined after the workflow
+    setup is run, because its name is timestamped. Therefore,
+    we search for an output directory based on config.output_path_basename.
+
+    Parameters
+    ----------
+    path_to_config : Path
+        path to the input config used to generate the
+        output.
+
+    parent_dir_of_install_path : str, optional
+        If the install_path in the input config is relative to the
+        directory the script is launched from (as is the case in the
+        default_config.json file), the absolute path to its parent_dir
+        must be specified here. If the paths to install_path is
+        absolute, this input is not required. By default "".
+    """
+
+    # load input config
+    with open(path_to_config) as config:
+        config_dict = json.load(config)
+    config = CellfinderConfig(**config_dict)
+
+    # check one output directory exists and
+    # it has expected output file inside it
+    output_path_without_timestamp = (
+        Path(parent_dir_of_install_path)
+        / config.install_path
+        / config.output_path_basename_relative
+    )
+    output_path_timestamped = [
+        x
+        for x in output_path_without_timestamp.parent.glob("*")
+        if x.is_dir() and x.name.startswith(output_path_without_timestamp.name)
+    ]
+
+    assert len(output_path_timestamped) == 1
+    assert (output_path_timestamped[0]).exists()
+    assert (
+        output_path_timestamped[0] / config.detected_cells_filename
+    ).is_file()
diff --git a/tests/test_unit/__init__.py b/tests/test_unit/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_unit/test_placeholder.py b/tests/test_unit/test_placeholder.py
new file mode 100644
index 00000000..3ada1ee4
--- /dev/null
+++ b/tests/test_unit/test_placeholder.py
@@ -0,0 +1,2 @@
+def test_placeholder():
+    assert True