diff --git a/MANIFEST.in b/MANIFEST.in
index c1b54649..34cf45e6 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -3,6 +3,7 @@ include README.md
 exclude .pre-commit-config.yaml
 
 recursive-include brainglobe_workflows *.py
+include brainglobe_workflows/cellfinder/default_config.json
 
 recursive-exclude * __pycache__
 recursive-exclude * *.py[co]
diff --git a/brainglobe_workflows/__init__.py b/brainglobe_workflows/__init__.py
index 28709be9..00081a03 100644
--- a/brainglobe_workflows/__init__.py
+++ b/brainglobe_workflows/__init__.py
@@ -1,7 +1,7 @@
 from importlib.metadata import PackageNotFoundError, version
 
 try:
-    __version__ = version("brainglobe-scripts")
+    __version__ = version("brainglobe-workflows")
 except PackageNotFoundError:
     # package is not installed
     pass
diff --git a/brainglobe_workflows/cellfinder/__init__.py b/brainglobe_workflows/cellfinder/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/brainglobe_workflows/cellfinder/cellfinder_main.py b/brainglobe_workflows/cellfinder/cellfinder_main.py
new file mode 100644
index 00000000..04ec7663
--- /dev/null
+++ b/brainglobe_workflows/cellfinder/cellfinder_main.py
@@ -0,0 +1,390 @@
+"""This script reproduces the most common cellfinder workflow
+
+It receives as an (optional) command line input the path to a configuration
+json file, that holds the values of the required parameters for the workflow.
+
+If no input json file is passed as a configuration, the default
+configuration defined at brainglobe_workflows/cellfinder/default_config.json
+is used.
+
+Example usage:
+ - to pass a custom configuration, run (from the cellfinder_main.py
+   parent directory):
+    python cellfinder_main.py --config path/to/input/config.json
+ - to use the default configuration, run
+    python cellfinder_main.py
+
+
+"""
+
+import argparse
+import datetime
+import json
+import logging
+import os
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Optional, Tuple, Union
+
+import pooch
+from brainglobe_utils.IO.cells import save_cells
+from cellfinder_core.main import main as cellfinder_run
+from cellfinder_core.tools.IO import read_with_dask
+from cellfinder_core.train.train_yml import depth_type
+
+Pathlike = Union[str, os.PathLike]
+
+DEFAULT_JSON_CONFIG_PATH = (
+    Path(__file__).resolve().parent / "default_config.json"
+)
+
+
+@dataclass
+class CellfinderConfig:
+    """
+    Define input and output data locations, and the parameters for
+    the cellfinder preprocessing steps.
+    """
+
+    # cellfinder workflows cache directory
+    install_path: Pathlike
+
+    # cached subdirectory to save data to
+    extract_dir_relative: Pathlike
+    signal_subdir: str
+    background_subdir: str
+    output_path_basename_relative: Pathlike
+    detected_cells_filename: Pathlike
+
+    # preprocessing parameters
+    voxel_sizes: Tuple[float, float, float]
+    start_plane: int
+    end_plane: int
+    trained_model: Optional[
+        os.PathLike
+    ]  # if None, it will use a default model
+    model_weights: Optional[os.PathLike]
+    model: str
+    batch_size: int
+    n_free_cpus: int
+    network_voxel_sizes: Tuple[int, int, int]
+    soma_diameter: int
+    ball_xy_size: int
+    ball_z_size: int
+    ball_overlap_fraction: float
+    log_sigma_size: float
+    n_sds_above_mean_thresh: int
+    soma_spread_factor: float
+    max_cluster_size: int
+    cube_width: int
+    cube_height: int
+    cube_depth: int
+    network_depth: depth_type
+
+    # origin of data to download (if required)
+    data_url: Optional[str] = None
+    data_hash: Optional[str] = None
+
+    # The following attributes are added
+    # during the setup phase of the workflow
+    list_signal_files: Optional[list] = None
+    list_background_files: Optional[list] = None
+    output_path: Pathlike = ""
+    signal_dir_path: Pathlike = ""
+    background_dir_path: Pathlike = ""
+    detected_cells_path: Pathlike = ""
+
+
+def setup_logger() -> logging.Logger:
+    """Setup a logger for this script
+
+    The logger's level is set to DEBUG, and it
+    is linked to a handler that writes to the
+    console and whose level is
+
+    Returns
+    -------
+    logging.Logger
+        a logger object
+    """
+    # define handler that writes to stdout
+    console_handler = logging.StreamHandler(sys.stdout)
+    console_format = logging.Formatter("%(name)s %(levelname)s: %(message)s")
+    console_handler.setFormatter(console_format)
+
+    # define logger and link to handler
+    logger = logging.getLogger(
+        __name__
+    )  # if imported as a module, the logger is named after the module
+    logger.setLevel(logging.DEBUG)
+    logger.addHandler(console_handler)
+    return logger
+
+
+def run_workflow_from_cellfinder_run(config: CellfinderConfig):
+    """
+    Run workflow based on the cellfinder_core.main.main()
+    function.
+
+    The steps are:
+    1. Read the input signal and background data as two separate
+       Dask arrays.
+    2. Run the main cellfinder pipeline on the input Dask arrays,
+       with the parameters defined in the input configuration (config).
+    3. Save the detected cells as an xml file to the location specified in
+       the input configuration (config).
+
+    Parameters
+    ----------
+    config : CellfinderConfig
+        a class with the required setup methods and parameters for
+        the cellfinder workflow
+    """
+    # Read input data as Dask arrays
+    signal_array = read_with_dask(config.signal_dir_path)
+    background_array = read_with_dask(config.background_dir_path)
+
+    # Run main analysis using `cellfinder_run`
+    detected_cells = cellfinder_run(
+        signal_array, background_array, config.voxel_sizes
+    )
+
+    # Save results to xml file
+    save_cells(
+        detected_cells,
+        config.detected_cells_path,
+    )
+
+
+def setup_workflow(input_config_path: Path) -> CellfinderConfig:
+    """Run setup steps prior to executing the workflow
+
+    These setup steps include:
+    - instantiating a CellfinderConfig object with the required parameters,
+    - checking if the input data exists locally, and fetching from
+      GIN repository otherwise,
+    - adding the path to the input data files to the config, and
+    - creating a timestamped directory for the output of the workflow if
+      it doesn't exist and adding its path to the config
+
+    Parameters
+    ----------
+    input_config_path : Path
+        path to the input config file
+
+    Returns
+    -------
+    config : CellfinderConfig
+        a dataclass whose attributes are the parameters
+        for running cellfinder.
+    """
+
+    # Check config file exists
+    assert input_config_path.exists()
+
+    # Instantiate a CellfinderConfig from the input json file
+    # (assumes config is json serializable)
+    with open(input_config_path) as c:
+        config_dict = json.load(c)
+    config = CellfinderConfig(**config_dict)
+
+    # Print info logs for status
+    logger.info(f"Input config read from {input_config_path}")
+    if input_config_path == DEFAULT_JSON_CONFIG_PATH:
+        logger.info("Using default config file")
+
+    # Retrieve and add lists of input data to the config,
+    # if these are defined yet
+    if not (config.list_signal_files and config.list_background_files):
+        # build fullpaths to inputs
+        config.signal_dir_path = str(
+            Path(config.install_path)
+            / config.extract_dir_relative
+            / config.signal_subdir
+        )
+        config.background_dir_path = str(
+            Path(config.install_path)
+            / config.extract_dir_relative
+            / config.background_subdir
+        )
+        # retrieve data
+        config = retrieve_input_data(config)
+
+    # Create timestamped output directory if it doesn't exist
+    timestamp = datetime.datetime.now()
+    timestamp_formatted = timestamp.strftime("%Y%m%d_%H%M%S")
+    output_path_timestamped = Path(config.install_path) / (
+        str(config.output_path_basename_relative) + timestamp_formatted
+    )
+    output_path_timestamped.mkdir(parents=True, exist_ok=True)
+
+    # Add output path and output file path to config
+    config.output_path = output_path_timestamped
+    config.detected_cells_path = (
+        config.output_path / config.detected_cells_filename
+    )
+
+    return config
+
+
+def retrieve_input_data(config: CellfinderConfig) -> CellfinderConfig:
+    """
+    Adds the lists of input data files (signal and background) to the config.
+
+    It first checks if the input data exists locally.
+    - If both directories (signal and background) exist, the lists of signal
+      and background files are added to the config.
+    - If exactly one of the input data directories is missing, an error
+      message is logged.
+    - If neither of them exist, the data is retrieved from the provided GIN
+      repository. If no URL or hash to GIN is provided, an error is shown.
+
+    Parameters
+    ----------
+    config : CellfinderConfig
+        a dataclass whose attributes are the parameters
+        for running cellfinder.
+
+    Returns
+    -------
+    config : CellfinderConfig
+        a dataclass whose attributes are the parameters
+        for running cellfinder.
+    """
+    # Check if input data (signal and background) exist locally.
+    # If both directories exist, get list of signal and background files
+    if (
+        Path(config.signal_dir_path).exists()
+        and Path(config.background_dir_path).exists()
+    ):
+        logger.info("Fetching input data from the local directories")
+
+        config.list_signal_files = [
+            f
+            for f in Path(config.signal_dir_path).resolve().iterdir()
+            if f.is_file()
+        ]
+        config.list_background_files = [
+            f
+            for f in Path(config.background_dir_path).resolve().iterdir()
+            if f.is_file()
+        ]
+
+    # If exactly one of the input data directories is missing, print error
+    elif (
+        Path(config.signal_dir_path).resolve().exists()
+        or Path(config.background_dir_path).resolve().exists()
+    ):
+        if not Path(config.signal_dir_path).resolve().exists():
+            logger.error(
+                f"The directory {config.signal_dir_path} does not exist"
+            )
+        else:
+            logger.error(
+                f"The directory {config.background_dir_path} does not exist"
+            )
+
+    # If neither of them exist, retrieve data from GIN repository
+    else:
+        # check if GIN URL and hash are defined (log error otherwise)
+        if (not config.data_url) or (not config.data_hash):
+            logger.error(
+                "Input data not found locally, and URL/hash to "
+                "GIN repository not provided"
+            )
+
+        else:
+            # get list of files in GIN archive with pooch.retrieve
+            list_files_archive = pooch.retrieve(
+                url=config.data_url,
+                known_hash=config.data_hash,
+                path=config.install_path,  # zip will be downloaded here
+                progressbar=True,
+                processor=pooch.Unzip(
+                    extract_dir=config.extract_dir_relative
+                    # path to unzipped dir,
+                    # *relative* to the path set in 'path'
+                ),
+            )
+            logger.info("Fetching input data from the provided GIN repository")
+
+            # Check signal and background parent directories exist now
+            assert Path(config.signal_dir_path).resolve().exists()
+            assert Path(config.background_dir_path).resolve().exists()
+
+            # Add signal files to config
+            config.list_signal_files = [
+                f
+                for f in list_files_archive
+                if f.startswith(
+                    str(Path(config.signal_dir_path).resolve())
+                )  # if str(config.signal_dir_path) in f
+            ]
+
+            # Add background files to config
+            config.list_background_files = [
+                f
+                for f in list_files_archive
+                if f.startswith(
+                    str(Path(config.background_dir_path).resolve())
+                )  # if str(config.background_dir_path) in f
+            ]
+
+    return config
+
+
+def parse_cli_arguments() -> argparse.Namespace:
+    """Define argument parser for cellfinder
+    workflow script.
+
+    It expects a path to a json file with the
+    parameters required to run the workflow.
+    If none is provided, the default
+
+    Returns
+    -------
+    args : argparse.Namespace
+        command line input arguments parsed
+    """
+    # initialise argument parser
+    parser = argparse.ArgumentParser(
+        description=(
+            "To launch the workflow with "
+            "a desired set of input parameters, run:"
+            " `python cellfinder_main.py --config path/to/input/config.json` "
+            "where path/to/input/config.json is the json file "
+            "containing the workflow parameters."
+        )
+    )
+    # add arguments
+    parser.add_argument(
+        "-c",
+        "--config",
+        default=str(DEFAULT_JSON_CONFIG_PATH),
+        type=str,
+        metavar="CONFIG",  # a name for usage messages
+        help="",
+    )
+
+    # build parser object
+    args = parser.parse_args()
+
+    # print error if required arguments not provided
+    if not args.config:
+        logger.error("Paths to input config not provided.")
+        parser.print_help()
+
+    return args
+
+
+if __name__ == "__main__":
+    # setup logger
+    logger = setup_logger()
+
+    # parse command line arguments
+    args = parse_cli_arguments()
+
+    # run workflow
+    config = setup_workflow(Path(args.config))
+    run_workflow_from_cellfinder_run(config)  # only this will be benchmarked
diff --git a/brainglobe_workflows/cellfinder/default_config.json b/brainglobe_workflows/cellfinder/default_config.json
new file mode 100644
index 00000000..a80a4ba4
--- /dev/null
+++ b/brainglobe_workflows/cellfinder/default_config.json
@@ -0,0 +1,39 @@
+{
+  "install_path": ".cellfinder_workflows",
+  "data_url": "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip",
+  "data_hash": "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914",
+  "extract_dir_relative": "cellfinder_test_data",
+  "signal_subdir": "signal",
+  "background_subdir": "background",
+  "output_path_basename_relative": "cellfinder_output_",
+  "detected_cells_filename": "detected_cells.xml",
+  "voxel_sizes": [
+    5,
+    2,
+    2
+  ],
+  "start_plane": 0,
+  "end_plane": -1,
+  "trained_model": null,
+  "model_weights": null,
+  "model": "resnet50_tv",
+  "batch_size": 32,
+  "n_free_cpus": 2,
+  "network_voxel_sizes": [
+    5,
+    1,
+    1
+  ],
+  "soma_diameter": 16,
+  "ball_xy_size": 6,
+  "ball_z_size": 15,
+  "ball_overlap_fraction": 0.6,
+  "log_sigma_size": 0.2,
+  "n_sds_above_mean_thresh": 10,
+  "soma_spread_factor": 1.4,
+  "max_cluster_size": 100000,
+  "cube_width": 50,
+  "cube_height": 50,
+  "cube_depth": 20,
+  "network_depth": "50"
+}
diff --git a/pyproject.toml b/pyproject.toml
index ec5c3257..c78a6bb6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,7 +5,10 @@ description = "A place to keep scripts to use as benchmarks, user-examples end-t
 readme = "README.md"
 requires-python = ">=3.8.0"
 dynamic = ["version"]
-
+dependencies = [
+    "pooch",
+    "cellfinder-core"
+]
 license = {text = "BSD-3-Clause"}
 
 classifiers = [
@@ -51,11 +54,11 @@ build-backend = "setuptools.build_meta"
 include-package-data = true
 
 [tool.setuptools.packages.find]
-include = ["brainglobe_scripts*"]
+include = ["brainglobe_workflows*"]
 exclude = ["tests*"]
 
 [tool.pytest.ini_options]
-addopts = "--cov=brainglobe_scripts"
+addopts = "--cov=brainglobe_workflows"
 
 [tool.black]
 target-version = ['py38', 'py39', 'py310']
@@ -101,5 +104,5 @@ python =
 extras =
     dev
 commands =
-    pytest -v --color=yes --cov=brainglobe_scripts --cov-report=xml
+    pytest -v --color=yes --cov=brainglobe_workflows --cov-report=xml
 """
diff --git a/tests/test_integration/conftest.py b/tests/test_integration/conftest.py
new file mode 100644
index 00000000..d9207917
--- /dev/null
+++ b/tests/test_integration/conftest.py
@@ -0,0 +1,290 @@
+import json
+from pathlib import Path
+from typing import Any
+
+import pooch
+import pytest
+
+from brainglobe_workflows.cellfinder.cellfinder_main import CellfinderConfig
+
+
+def make_config_dict_fetch_from_local(cellfinder_cache_dir: Path) -> dict:
+    """Generate a config dictionary with the required parameters
+    for the workflow
+
+    The input data is assumed to be locally at cellfinder_cache_dir.
+    The results are saved in a timestamped output subdirectory under
+    cellfinder_cache_dir
+
+    Parameters
+    ----------
+    cellfinder_cache_dir : Path
+        Path to the directory where the downloaded input data will be unzipped,
+        and the output will be saved
+
+    Returns
+    -------
+    dict
+        dictionary with the required parameters for the workflow
+    """
+    return {
+        "install_path": cellfinder_cache_dir,
+        "extract_dir_relative": "cellfinder_test_data",  # relative path
+        "signal_subdir": "signal",
+        "background_subdir": "background",
+        "output_path_basename_relative": "cellfinder_output_",
+        "detected_cells_filename": "detected_cells.xml",
+        "voxel_sizes": [5, 2, 2],  # microns
+        "start_plane": 0,
+        "end_plane": -1,
+        "trained_model": None,  # if None, it will use a default model
+        "model_weights": None,
+        "model": "resnet50_tv",
+        "batch_size": 32,
+        "n_free_cpus": 2,
+        "network_voxel_sizes": [5, 1, 1],
+        "soma_diameter": 16,
+        "ball_xy_size": 6,
+        "ball_z_size": 15,
+        "ball_overlap_fraction": 0.6,
+        "log_sigma_size": 0.2,
+        "n_sds_above_mean_thresh": 10,
+        "soma_spread_factor": 1.4,
+        "max_cluster_size": 100000,
+        "cube_width": 50,
+        "cube_height": 50,
+        "cube_depth": 20,
+        "network_depth": "50",
+    }
+
+
+def make_config_dict_fetch_from_GIN(
+    cellfinder_cache_dir: Path,
+    data_url: str,
+    data_hash: str,
+) -> dict:
+    """Generate a config dictionary with the required parameters
+    for the workflow
+
+    The input data is fetched from GIN and downloaded to cellfinder_cache_dir.
+    The results are also saved in a timestamped output subdirectory under
+    cellfinder_cache_dir
+
+    Parameters
+    ----------
+    cellfinder_cache_dir : Path
+        Path to the directory where the downloaded input data will be unzipped,
+        and the output will be saved
+    data_url: str
+        URL to the GIN repository with the data to download
+    data_hash: str
+        Hash of the data to download
+
+    Returns
+    -------
+    dict
+        dictionary with the required parameters for the workflow
+    """
+
+    config = make_config_dict_fetch_from_local(cellfinder_cache_dir)
+    config["data_url"] = data_url
+    config["data_hash"] = data_hash
+
+    return config
+
+
+def prep_json(obj: Any) -> Any:
+    """
+    Returns a JSON encodable version of the input object.
+
+    It uses the JSON default encoder for all objects
+    except those of type `Path`.
+
+
+    Parameters
+    ----------
+    obj : Any
+        _description_
+
+    Returns
+    -------
+    Any
+        JSON serializable version of input object
+    """
+    if isinstance(obj, Path):
+        return str(obj)
+    else:
+        json_decoder = json.JSONEncoder()
+        return json_decoder.default(obj)
+
+
+@pytest.fixture(autouse=True)
+def cellfinder_cache_dir(tmp_path: Path) -> Path:
+    """Create a .cellfinder_workflows directory
+    under a temporary pytest directory and return
+    its path.
+
+    The temporary directory is available via pytest's tmp_path
+    fixture. A new temporary directory is created every function call
+    (i.e., scope="function")
+
+    Parameters
+    ----------
+    tmp_path : Path
+        path to pytest-generated temporary directory
+
+    Returns
+    -------
+    Path
+        path to the created cellfinder_workflows cache directory
+    """
+
+    return Path(tmp_path) / ".cellfinder_workflows"
+
+
+@pytest.fixture(scope="session")
+def data_url() -> str:
+    """Return the URL to the GIN repository with the input data
+
+    Returns
+    -------
+    str
+        URL to the GIN repository with the input data
+    """
+    return "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip"
+
+
+@pytest.fixture(scope="session")
+def data_hash() -> str:
+    """Return the hash of the GIN input data
+
+    Returns
+    -------
+    str
+        Hash to the GIN input data
+    """
+    return "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914"
+
+
+@pytest.fixture(scope="session")
+def default_json_config_path() -> Path:
+    """Return the path to the json file
+    with the default config parameters
+
+    Returns
+    -------
+    Path
+        path to the json file with the default config parameters
+    """
+    from brainglobe_workflows.cellfinder.cellfinder_main import (
+        DEFAULT_JSON_CONFIG_PATH,
+    )
+
+    return DEFAULT_JSON_CONFIG_PATH
+
+
+@pytest.fixture()
+def path_to_config_fetch_GIN(
+    tmp_path: Path, cellfinder_cache_dir: Path, data_url: str, data_hash: str
+) -> Path:
+    """Create an input config that fetches data from GIN and
+    return its path
+
+    Parameters
+    ----------
+    tmp_path : Path
+        path to a fresh pytest-generated temporary directory. The
+        generated config is saved here.
+
+    cellfinder_cache_dir : Path
+        path to the cellfinder cache directory, where the paths
+        in the config should point to.
+
+    data_url: str
+        URL to the GIN repository with the input data
+
+    data_hash: str
+        hash to the GIN input data
+
+    Returns
+    -------
+    input_config_path : Path
+        path to config file that fetches data from GIN
+    """
+    # create config dict
+    config_dict = make_config_dict_fetch_from_GIN(
+        cellfinder_cache_dir, data_url, data_hash
+    )
+
+    # create a temp json file to dump config data
+    input_config_path = (
+        tmp_path / "input_config.json"
+    )  # save it in a temp dir separate from cellfinder_cache_dir
+
+    # save config data to json file
+    with open(input_config_path, "w") as js:
+        json.dump(config_dict, js, default=prep_json)
+
+    # check json file exists
+    assert Path(input_config_path).is_file()
+
+    return input_config_path
+
+
+@pytest.fixture()
+def path_to_config_fetch_local(
+    tmp_path: Path, cellfinder_cache_dir: Path, data_url: str, data_hash: str
+) -> Path:
+    """Create an input config that points to local data and
+    return its path.
+
+    The local data is downloaded from GIN, but no reference
+    to the GIN repository is included in the config.
+
+    Parameters
+    ----------
+    tmp_path : Path
+        path to a fresh pytest-generated temporary directory. The
+        generated config is saved here.
+
+    cellfinder_cache_dir : Path
+        path to the cellfinder cache directory, where the paths
+        in the config should point to.
+
+    data_url: str
+        URL to the GIN repository with the input data
+
+    data_hash: str
+        hash to the GIN input data
+
+    Returns
+    -------
+    path_to_config_fetch_GIN : Path
+        path to a config file that fetches data from GIN
+    """
+
+    # instantiate basic config (assumes data is local)
+    config_dict = make_config_dict_fetch_from_local(cellfinder_cache_dir)
+    config = CellfinderConfig(**config_dict)
+
+    # download GIN data to specified local directory
+    pooch.retrieve(
+        url=data_url,
+        known_hash=data_hash,
+        path=config.install_path,  # path to download zip to
+        progressbar=True,
+        processor=pooch.Unzip(
+            extract_dir=config.extract_dir_relative
+            # path to unzipped dir, *relative*  to 'path'
+        ),
+    )
+
+    # save config to json
+    input_config_path = tmp_path / "input_config.json"
+    with open(input_config_path, "w") as js:
+        json.dump(config_dict, js, default=prep_json)
+
+    # check json file exists
+    assert Path(input_config_path).is_file()
+
+    return input_config_path
diff --git a/tests/test_integration/test_cellfinder_workflow.py b/tests/test_integration/test_cellfinder_workflow.py
new file mode 100644
index 00000000..e55d0a46
--- /dev/null
+++ b/tests/test_integration/test_cellfinder_workflow.py
@@ -0,0 +1,211 @@
+import json
+import subprocess
+import sys
+from pathlib import Path
+
+from brainglobe_workflows.cellfinder.cellfinder_main import CellfinderConfig
+
+
+def test_run_with_default_config(tmp_path, default_json_config_path):
+    """Test workflow run with no command line arguments
+
+    If no command line arguments are provided, the default
+    config at brainglobe_workflows/cellfinder/default_config.json
+    should be used.
+
+    After the workflow is run we check that:
+    - there are no errors (via returncode),
+    - the logs reflect the default config file was used, and
+    - a single output directory exists with the expected
+      output file inside it
+
+    Parameters
+    ----------
+    tmp_path : Path
+        path to a pytest-generated temporary directory.
+    """
+
+    # run workflow with no CLI arguments,
+    # with cwd=tmp_path
+    subprocess_output = subprocess.run(
+        [
+            sys.executable,
+            Path(__file__).resolve().parents[2]
+            / "brainglobe_workflows"
+            / "cellfinder"
+            / "cellfinder_main.py",
+        ],
+        cwd=tmp_path,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        text=True,
+        encoding="utf-8",
+    )
+
+    # check returncode
+    assert subprocess_output.returncode == 0
+
+    # check logs
+    assert "Using default config file" in subprocess_output.stdout
+
+    # Check one output directory exists and has expected
+    # output file inside it
+    assert_outputs(default_json_config_path, tmp_path)
+
+
+def test_run_with_GIN_data(
+    path_to_config_fetch_GIN,
+):
+    """Test workflow runs when passing a config that fetches data
+    from the GIN repository
+
+    After the workflow is run we check that:
+    - there are no errors (via returncode),
+    - the logs reflect the input config file was used,
+    - the logs reflect the data was downloaded from GIN, and
+    - a single output directory exists with the expected
+      output file inside it
+
+    Parameters
+    ----------
+    tmp_path : Path
+        path to a pytest-generated temporary directory.
+    """
+    # run workflow with CLI and capture log
+    subprocess_output = subprocess.run(
+        [
+            sys.executable,
+            Path(__file__).resolve().parents[2]
+            / "brainglobe_workflows"
+            / "cellfinder"
+            / "cellfinder_main.py",
+            "--config",
+            str(path_to_config_fetch_GIN),
+        ],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        text=True,
+        encoding="utf-8",
+    )
+
+    # check returncode
+    assert subprocess_output.returncode == 0
+
+    # check logs
+    assert (
+        f"Input config read from {str(path_to_config_fetch_GIN)}"
+        in subprocess_output.stdout
+    )
+    assert (
+        "Fetching input data from the provided GIN repository"
+        in subprocess_output.stdout
+    )
+
+    # check one output directory exists and
+    # has expected output file inside it
+    assert_outputs(path_to_config_fetch_GIN)
+
+
+def test_run_with_local_data(
+    path_to_config_fetch_local,
+):
+    """Test workflow runs when passing a config that uses
+    local data
+
+    After the workflow is run we check that:
+    - there are no errors (via returncode),
+    - the logs reflect the input config file was used,
+    - the logs reflect the data was found locally, and
+    - a single output directory exists with the expected
+      output file inside it
+
+    Parameters
+    ----------
+    tmp_path : Path
+        path to a pytest-generated temporary directory.
+    """
+
+    # run workflow with CLI
+    subprocess_output = subprocess.run(
+        [
+            sys.executable,
+            Path(__file__).resolve().parents[2]
+            / "brainglobe_workflows"
+            / "cellfinder"
+            / "cellfinder_main.py",
+            "--config",
+            str(path_to_config_fetch_local),
+        ],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        text=True,
+        encoding="utf-8",
+    )
+
+    # check returncode
+    assert subprocess_output.returncode == 0
+
+    # check logs
+    assert (
+        f"Input config read from {str(path_to_config_fetch_local)}"
+        in subprocess_output.stdout
+    )
+    assert (
+        "Fetching input data from the local directories"
+        in subprocess_output.stdout
+    )
+
+    # check one output directory exists and
+    # has expected output file inside it
+    assert_outputs(path_to_config_fetch_local)
+
+
+def assert_outputs(path_to_config, parent_dir_of_install_path=""):
+    """Helper function to determine whether the output is
+    as expected.
+
+    It checks that:
+     - a single output directory exists, and
+     - the expected output file exists inside it
+
+    Note that config.output_path is only defined after the workflow
+    setup is run, because its name is timestamped. Therefore,
+    we search for an output directory based on config.output_path_basename.
+
+    Parameters
+    ----------
+    path_to_config : Path
+        path to the input config used to generate the
+        output.
+
+    parent_dir_of_install_path : str, optional
+        If the install_path in the input config is relative to the
+        directory the script is launched from (as is the case in the
+        default_config.json file), the absolute path to its parent_dir
+        must be specified here. If the paths to install_path is
+        absolute, this input is not required. By default "".
+    """
+
+    # load input config
+    with open(path_to_config) as config:
+        config_dict = json.load(config)
+    config = CellfinderConfig(**config_dict)
+
+    # check one output directory exists and
+    # it has expected output file inside it
+    output_path_without_timestamp = (
+        Path(parent_dir_of_install_path)
+        / config.install_path
+        / config.output_path_basename_relative
+    )
+    output_path_timestamped = [
+        x
+        for x in output_path_without_timestamp.parent.glob("*")
+        if x.is_dir() and x.name.startswith(output_path_without_timestamp.name)
+    ]
+
+    assert len(output_path_timestamped) == 1
+    assert (output_path_timestamped[0]).exists()
+    assert (
+        output_path_timestamped[0] / config.detected_cells_filename
+    ).is_file()