From 4176c3b1b33802f345dbae8b685836b43dd915e0 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Mon, 16 Oct 2023 11:51:53 +0100
Subject: [PATCH 01/54] proto benchmarking smoke test (WIP)

---
 .../test_cellfinder_benchmarks.py             | 75 +++++++++++++++++++
 1 file changed, 75 insertions(+)
 create mode 100644 tests/test_integration/test_cellfinder_benchmarks.py

diff --git a/tests/test_integration/test_cellfinder_benchmarks.py b/tests/test_integration/test_cellfinder_benchmarks.py
new file mode 100644
index 00000000..708a3635
--- /dev/null
+++ b/tests/test_integration/test_cellfinder_benchmarks.py
@@ -0,0 +1,75 @@
+import json
+import subprocess
+from pathlib import Path
+
+import pytest
+
+
+@pytest.fixture()
+def asv_config_monkeypatched_path(tmp_path):
+    """Create a monkeypatched asv.conf.json file
+    in tmp_path and return its path
+
+    Parameters
+    ----------
+    tmp_path : Path
+        path to pytest-generated temporary directory
+
+
+    Returns
+    -------
+    _type_
+        _description_
+    """
+    # read reference asv config
+    asv_original_path = Path(__file__).resolve().parents[2] / "asv.conf.json"
+    with open(asv_original_path) as asv_config:
+        asv_monkeypatched_dict = json.load(asv_config)
+
+    # change directories
+    for ky in ["env_dir", "results_dir", "html_dir"]:
+        asv_monkeypatched_dict[ky] = str(
+            Path(tmp_path) / asv_monkeypatched_dict[ky]
+        )
+
+    # define path to a temp json file to dump config data
+    asv_monkeypatched_path = tmp_path / "asv.conf.json"
+
+    # save monkeypatched config data to json file
+    with open(asv_monkeypatched_path, "w") as js:
+        json.dump(asv_monkeypatched_dict, js)
+
+    # check json file exists
+    assert asv_monkeypatched_path.is_file()
+
+    return str(asv_monkeypatched_path)
+
+
+def test_run_benchmarks(asv_config_monkeypatched_path):
+    # --- ideally monkeypatch an asv config so that results are in tmp_dir?
+
+    # set up machine (env_dir, results_dir, html_dir)
+    subprocess.run(["asv", "machine", "--yes"])
+
+    # run benchmarks
+    subprocess_output = subprocess.run(
+        [
+            "asv",
+            "run",
+            "--config",  # --- can I pass a dict?
+            asv_config_monkeypatched_path,
+            # "--dry-run" # Do not save any results to disk. for now!
+        ],
+        # cwd=tmp_path, ---> from where asv config is
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        text=True,
+        encoding="utf-8",
+    )
+
+    # check returncode
+    assert subprocess_output.returncode == 0
+
+    # check logs?
+
+    # delete directories?

From a9f8f11f5c3cf515e877424132a026908f717d3c Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Mon, 16 Oct 2023 12:27:06 +0100
Subject: [PATCH 02/54] added git URL to monkeypatched asv but not fully
 working yet (WIP)

---
 .../test_cellfinder_benchmarks.py             | 43 +++++++++++++++----
 1 file changed, 34 insertions(+), 9 deletions(-)

diff --git a/tests/test_integration/test_cellfinder_benchmarks.py b/tests/test_integration/test_cellfinder_benchmarks.py
index 708a3635..ab204764 100644
--- a/tests/test_integration/test_cellfinder_benchmarks.py
+++ b/tests/test_integration/test_cellfinder_benchmarks.py
@@ -3,6 +3,7 @@
 from pathlib import Path
 
 import pytest
+from asv import util
 
 
 @pytest.fixture()
@@ -23,8 +24,11 @@ def asv_config_monkeypatched_path(tmp_path):
     """
     # read reference asv config
     asv_original_path = Path(__file__).resolve().parents[2] / "asv.conf.json"
-    with open(asv_original_path) as asv_config:
-        asv_monkeypatched_dict = json.load(asv_config)
+    asv_monkeypatched_dict = util.load_json(
+        asv_original_path, js_comments=True
+    )
+    # with open(asv_original_path) as asv_config:
+    #     asv_monkeypatched_dict = json.load(asv_config)
 
     # change directories
     for ky in ["env_dir", "results_dir", "html_dir"]:
@@ -32,6 +36,14 @@ def asv_config_monkeypatched_path(tmp_path):
             Path(tmp_path) / asv_monkeypatched_dict[ky]
         )
 
+    # change repo to URL rather than local
+    asv_monkeypatched_dict[
+        "repo"
+    ] = "https://github.com/brainglobe/brainglobe-workflows/"
+    asv_monkeypatched_dict[
+        "dvcs"
+    ] = "git"  # not sure why I need this with the URL?
+
     # define path to a temp json file to dump config data
     asv_monkeypatched_path = tmp_path / "asv.conf.json"
 
@@ -49,26 +61,39 @@ def test_run_benchmarks(asv_config_monkeypatched_path):
     # --- ideally monkeypatch an asv config so that results are in tmp_dir?
 
     # set up machine (env_dir, results_dir, html_dir)
-    subprocess.run(["asv", "machine", "--yes"])
+    asv_machine_output = subprocess.run(
+        [
+            "asv",
+            "machine",
+            "--yes",
+            "--config",
+            asv_config_monkeypatched_path,
+        ]
+    )
+    assert asv_machine_output.returncode == 0
 
     # run benchmarks
-    subprocess_output = subprocess.run(
+    asv_benchmark_output = subprocess.run(
         [
             "asv",
             "run",
-            "--config",  # --- can I pass a dict?
+            "--config",
             asv_config_monkeypatched_path,
-            # "--dry-run" # Do not save any results to disk. for now!
+            # "--dry-run"
+            # # Do not save any results to disk? not truly testing then
         ],
-        # cwd=tmp_path, ---> from where asv config is
+        cwd=str(
+            Path(asv_config_monkeypatched_path).parent
+        ),  # ---> from where asv config is
         stdout=subprocess.PIPE,
         stderr=subprocess.STDOUT,
         text=True,
         encoding="utf-8",
-    )
+    )  # STDOUT: "· Cloning project\n· Fetching recent changes\n·
+    # Creating environments\n· No __init__.py file in 'benchmarks'\n"?
 
     # check returncode
-    assert subprocess_output.returncode == 0
+    assert asv_benchmark_output.returncode == 0
 
     # check logs?
 

From 8ef7e571f44af964a751ee0bb4a03b6550a72c01 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Fri, 1 Dec 2023 14:54:52 +0000
Subject: [PATCH 03/54] rename workflows directory

---
 MANIFEST.in                                               | 8 ++++----
 {brainglobe_workflows => workflows}/__init__.py           | 0
 .../cellfinder/__init__.py                                | 0
 .../cellfinder/cellfinder_main.py                         | 0
 .../cellfinder/default_config.json                        | 0
 5 files changed, 4 insertions(+), 4 deletions(-)
 rename {brainglobe_workflows => workflows}/__init__.py (100%)
 rename {brainglobe_workflows => workflows}/cellfinder/__init__.py (100%)
 rename {brainglobe_workflows => workflows}/cellfinder/cellfinder_main.py (100%)
 rename {brainglobe_workflows => workflows}/cellfinder/default_config.json (100%)

diff --git a/MANIFEST.in b/MANIFEST.in
index 27538bc8..b47713ce 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -2,15 +2,15 @@ include LICENSE
 include README.md
 exclude .pre-commit-config.yaml
 
-recursive-include brainglobe_workflows *.py
-include brainglobe_workflows/cellfinder/default_config.json
+recursive-include workflows *.py
+recursive-include workflows *.json
 
 recursive-exclude * __pycache__
 recursive-exclude * *.py[co]
 recursive-exclude docs *
 recursive-exclude tests *
 
-include *.json
-recursive-include benchmarks *.json
+include asv.conf.json
 recursive-include benchmarks *.py
+recursive-include benchmarks *.json
 recursive-exclude benchmarks/results *
diff --git a/brainglobe_workflows/__init__.py b/workflows/__init__.py
similarity index 100%
rename from brainglobe_workflows/__init__.py
rename to workflows/__init__.py
diff --git a/brainglobe_workflows/cellfinder/__init__.py b/workflows/cellfinder/__init__.py
similarity index 100%
rename from brainglobe_workflows/cellfinder/__init__.py
rename to workflows/cellfinder/__init__.py
diff --git a/brainglobe_workflows/cellfinder/cellfinder_main.py b/workflows/cellfinder/cellfinder_main.py
similarity index 100%
rename from brainglobe_workflows/cellfinder/cellfinder_main.py
rename to workflows/cellfinder/cellfinder_main.py
diff --git a/brainglobe_workflows/cellfinder/default_config.json b/workflows/cellfinder/default_config.json
similarity index 100%
rename from brainglobe_workflows/cellfinder/default_config.json
rename to workflows/cellfinder/default_config.json

From ff2bbf175c111397fe8e30b9896baf657cc8950c Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Fri, 1 Dec 2023 19:22:09 +0000
Subject: [PATCH 04/54] rename and reorganise default configs and cellfinder
 main workflow script

---
 .../cellfinder_main.py => cellfinder.py}      | 233 ++++++------------
 workflows/cellfinder/__init__.py              |   0
 .../cellfinder.json}                          |   0
 workflows/utils.py                            |  92 +++++++
 4 files changed, 172 insertions(+), 153 deletions(-)
 rename workflows/{cellfinder/cellfinder_main.py => cellfinder.py} (80%)
 delete mode 100644 workflows/cellfinder/__init__.py
 rename workflows/{cellfinder/default_config.json => configs/cellfinder.json} (100%)
 create mode 100644 workflows/utils.py

diff --git a/workflows/cellfinder/cellfinder_main.py b/workflows/cellfinder.py
similarity index 80%
rename from workflows/cellfinder/cellfinder_main.py
rename to workflows/cellfinder.py
index fd19db34..e022aae5 100644
--- a/workflows/cellfinder/cellfinder_main.py
+++ b/workflows/cellfinder.py
@@ -17,10 +17,9 @@
 
 """
 
-import argparse
+
 import datetime
 import json
-import logging
 import os
 import sys
 from dataclasses import dataclass
@@ -35,10 +34,6 @@
 
 Pathlike = Union[str, os.PathLike]
 
-DEFAULT_JSON_CONFIG_PATH = (
-    Path(__file__).resolve().parent / "default_config.json"
-)
-
 
 @dataclass
 class CellfinderConfig:
@@ -97,146 +92,6 @@ class CellfinderConfig:
 
 
 def setup(argv=None) -> CellfinderConfig:
-    def parse_cli_arguments(argv_) -> argparse.Namespace:
-        """Define argument parser for cellfinder
-        workflow script.
-
-        It expects a path to a json file with the
-        parameters required to run the workflow.
-        If none is provided, the default
-
-        Returns
-        -------
-        args : argparse.Namespace
-            command line input arguments parsed
-        """
-        # initialise argument parser
-        parser = argparse.ArgumentParser(
-            description=(
-                "To launch the workflow with "
-                "a specific set of input parameters, run: "
-                "`python cellfinder_main.py --config path/to/config.json`"
-                "where path/to/input/config.json is the json file "
-                "containing the workflow parameters."
-            )
-        )
-        # add arguments
-        parser.add_argument(
-            "-c",
-            "--config",
-            default=str(DEFAULT_JSON_CONFIG_PATH),
-            type=str,
-            metavar="CONFIG",  # a name for usage messages
-            help="",
-        )
-
-        # build parser object
-        args = parser.parse_args(argv_)
-
-        # print error if required arguments not provided
-        if not args.config:
-            logger.error("Paths to input config not provided.")
-            parser.print_help()
-
-        return args
-
-    def setup_logger() -> logging.Logger:
-        """Setup a logger for this script
-
-        The logger's level is set to DEBUG, and it
-        is linked to a handler that writes to the
-        console and whose level is
-
-        Returns
-        -------
-        logging.Logger
-            a logger object
-        """
-        # define handler that writes to stdout
-        console_handler = logging.StreamHandler(sys.stdout)
-        console_format = logging.Formatter(
-            "%(name)s %(levelname)s: %(message)s"
-        )
-        console_handler.setFormatter(console_format)
-
-        # define logger and link to handler
-        logger = logging.getLogger(
-            __name__
-        )  # if imported as a module, the logger is named after the module
-        logger.setLevel(logging.DEBUG)
-        logger.addHandler(console_handler)
-        return logger
-
-    def setup_workflow(input_config_path: Path) -> CellfinderConfig:
-        """Run setup steps prior to executing the workflow
-
-        These setup steps include:
-        - instantiating a CellfinderConfig object with the required parameters,
-        - checking if the input data exists locally, and fetching from
-        GIN repository otherwise,
-        - adding the path to the input data files to the config, and
-        - creating a timestamped directory for the output of the workflow if
-        it doesn't exist and adding its path to the config
-
-        Parameters
-        ----------
-        input_config_path : Path
-            path to the input config file
-
-        Returns
-        -------
-        config : CellfinderConfig
-            a dataclass whose attributes are the parameters
-            for running cellfinder.
-        """
-
-        # Check config file exists
-        assert input_config_path.exists()
-
-        # Instantiate a CellfinderConfig from the input json file
-        # (assumes config is json serializable)
-        with open(input_config_path) as cfg:
-            config_dict = json.load(cfg)
-        config = CellfinderConfig(**config_dict)
-
-        # Print info logs for status
-        logger.info(f"Input config read from {input_config_path}")
-        if input_config_path == DEFAULT_JSON_CONFIG_PATH:
-            logger.info("Using default config file")
-
-        # Retrieve and add lists of input data to the config,
-        # if these are defined yet
-        if not (config.list_signal_files and config.list_signal_files):
-            # build fullpaths to inputs
-            config.signal_dir_path = str(
-                Path(config.install_path)
-                / config.extract_dir_relative
-                / config.signal_subdir
-            )
-            config.background_dir_path = str(
-                Path(config.install_path)
-                / config.extract_dir_relative
-                / config.background_subdir
-            )
-            # retrieve data
-            config = retrieve_input_data(config)
-
-        # Create timestamped output directory if it doesn't exist
-        timestamp = datetime.datetime.now()
-        timestamp_formatted = timestamp.strftime("%Y%m%d_%H%M%S")
-        output_path_timestamped = Path(config.install_path) / (
-            str(config.output_path_basename_relative) + timestamp_formatted
-        )
-        output_path_timestamped.mkdir(parents=True, exist_ok=True)
-
-        # Add output path and output file path to config
-        config.output_path = output_path_timestamped
-        config.detected_cells_path = (
-            config.output_path / config.detected_cells_filename
-        )
-
-        return config
-
     def retrieve_input_data(config: CellfinderConfig) -> CellfinderConfig:
         """
         Adds the lists of input data files (signal and background)
@@ -346,14 +201,81 @@ def retrieve_input_data(config: CellfinderConfig) -> CellfinderConfig:
 
         return config
 
-    # parse command line input arguments:
-    # sys.argv in most cases except for testing
-    # see https://paiml.com/docs/home/books/testing-in-python/chapter08-monkeypatching/#the-simplest-monkeypatching
-    argv = argv or sys.argv[1:]
-    args = parse_cli_arguments(argv)
+    def setup_workflow(input_config_path: Path) -> CellfinderConfig:
+        """Run setup steps prior to executing the workflow
+
+        These setup steps include:
+        - instantiating a CellfinderConfig object with the required parameters,
+        - checking if the input data exists locally, and fetching from
+        GIN repository otherwise,
+        - adding the path to the input data files to the config, and
+        - creating a timestamped directory for the output of the workflow if
+        it doesn't exist and adding its path to the config
+
+        Parameters
+        ----------
+        input_config_path : Path
+            path to the input config file
+
+        Returns
+        -------
+        config : CellfinderConfig
+            a dataclass whose attributes are the parameters
+            for running cellfinder.
+        """
+
+        # Check config file exists
+        assert input_config_path.exists()
+
+        # Instantiate a CellfinderConfig from the input json file
+        # (assumes config is json serializable)
+        with open(input_config_path) as cfg:
+            config_dict = json.load(cfg)
+        config = CellfinderConfig(**config_dict)
+
+        # Print info logs for status
+        logger.info(f"Input config read from {input_config_path}")
+        if input_config_path == DEFAULT_JSON_CONFIG_PATH_CELLFINDER:
+            logger.info("Using default config file")
+
+        # Retrieve and add lists of input data to the config,
+        # if these are defined yet
+        if not (config.list_signal_files and config.list_signal_files):
+            # build fullpaths to inputs
+            config.signal_dir_path = str(
+                Path(config.install_path)
+                / config.extract_dir_relative
+                / config.signal_subdir
+            )
+            config.background_dir_path = str(
+                Path(config.install_path)
+                / config.extract_dir_relative
+                / config.background_subdir
+            )
+            # retrieve data
+            config = retrieve_input_data(config)
+
+        # Create timestamped output directory if it doesn't exist
+        timestamp = datetime.datetime.now()
+        timestamp_formatted = timestamp.strftime("%Y%m%d_%H%M%S")
+        output_path_timestamped = Path(config.install_path) / (
+            str(config.output_path_basename_relative) + timestamp_formatted
+        )
+        output_path_timestamped.mkdir(parents=True, exist_ok=True)
+
+        # Add output path and output file path to config
+        config.output_path = output_path_timestamped
+        config.detected_cells_path = (
+            config.output_path / config.detected_cells_filename
+        )
+
+        return config
 
-    # setup logger
-    logger = setup_logger()
+    # setup logger and parse command line arguments
+    argv = (
+        argv or sys.argv[1:]
+    )  # sys.argv in most cases except for testing when we monkeypatch it
+    args, logger = setup_logger_and_parse_cli_arguments(argv)
 
     # run setup steps and return config
     cfg = setup_workflow(Path(args.config))
@@ -397,6 +319,11 @@ def run_workflow_from_cellfinder_run(cfg: CellfinderConfig):
 
 
 if __name__ == "__main__":
+    from utils import (
+        DEFAULT_JSON_CONFIG_PATH_CELLFINDER,
+        setup_logger_and_parse_cli_arguments,
+    )
+
     # run setup
     cfg = setup()
 
diff --git a/workflows/cellfinder/__init__.py b/workflows/cellfinder/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/workflows/cellfinder/default_config.json b/workflows/configs/cellfinder.json
similarity index 100%
rename from workflows/cellfinder/default_config.json
rename to workflows/configs/cellfinder.json
diff --git a/workflows/utils.py b/workflows/utils.py
new file mode 100644
index 00000000..abb76e8c
--- /dev/null
+++ b/workflows/utils.py
@@ -0,0 +1,92 @@
+import argparse
+import logging
+import sys
+from pathlib import Path
+
+DEFAULT_JSON_CONFIGS_PATH = Path(__file__).resolve().parent / "configs"
+
+DEFAULT_JSON_CONFIG_PATH_CELLFINDER = (
+    DEFAULT_JSON_CONFIGS_PATH / "cellfinder.json"
+)
+
+
+def setup_logger_and_parse_cli_arguments(argv):
+    def setup_logger() -> logging.Logger:
+        """Setup a logger for this script
+
+        The logger's level is set to DEBUG, and it
+        is linked to a handler that writes to the
+        console and whose level is
+
+        Returns
+        -------
+        logging.Logger
+            a logger object
+        """
+        # define handler that writes to stdout
+        console_handler = logging.StreamHandler(sys.stdout)
+        console_format = logging.Formatter(
+            "%(name)s %(levelname)s: %(message)s"
+        )
+        console_handler.setFormatter(console_format)
+
+        # define logger and link to handler
+        logger = logging.getLogger(
+            __name__
+        )  # if imported as a module, the logger is named after the module
+        logger.setLevel(logging.DEBUG)
+        logger.addHandler(console_handler)
+        return logger
+
+    def parse_cli_arguments(argv_) -> argparse.Namespace:
+        """Define argument parser for cellfinder
+        workflow script.
+
+        It expects a path to a json file with the
+        parameters required to run the workflow.
+        If none is provided, the default
+
+        Returns
+        -------
+        args : argparse.Namespace
+            command line input arguments parsed
+        """
+        # initialise argument parser
+        parser = argparse.ArgumentParser(
+            description=(
+                "To launch the workflow with "
+                "a specific set of input parameters, run: "
+                "`python cellfinder_main.py --config path/to/config.json`"
+                "where path/to/input/config.json is the json file "
+                "containing the workflow parameters."
+            )
+        )
+        # add arguments
+        parser.add_argument(
+            "-c",
+            "--config",
+            default=str(
+                DEFAULT_JSON_CONFIG_PATH_CELLFINDER
+            ),  # can I do argv_[0]? --- TODO use typer!
+            type=str,
+            metavar="CONFIG",  # a name for usage messages
+            help="",
+        )
+
+        # build parser object
+        args = parser.parse_args(argv_)
+
+        # print error if required arguments not provided
+        if not args.config:
+            logger.error("Paths to input config not provided.")
+            parser.print_help()
+
+        return args
+
+    # setup logger
+    logger = setup_logger()
+
+    # parse command line arguments
+    args = parse_cli_arguments(argv)
+
+    return args, logger

From 36eb5b2cab985c6017da4e3b96b54be0444b1eaf Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Fri, 1 Dec 2023 19:22:49 +0000
Subject: [PATCH 05/54] update benchmarks to new locations of modules

---
 benchmarks/cellfinder.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/benchmarks/cellfinder.py b/benchmarks/cellfinder.py
index 76d364bc..8839e434 100644
--- a/benchmarks/cellfinder.py
+++ b/benchmarks/cellfinder.py
@@ -7,14 +7,14 @@
 from cellfinder_core.main import main as cellfinder_run
 from cellfinder_core.tools.IO import read_with_dask
 
-from brainglobe_workflows.cellfinder.cellfinder_main import (
-    DEFAULT_JSON_CONFIG_PATH,
+from workflows.cellfinder import (
     CellfinderConfig,
     run_workflow_from_cellfinder_run,
 )
-from brainglobe_workflows.cellfinder.cellfinder_main import (
+from workflows.cellfinder import (
     setup as setup_cellfinder_workflow,
 )
+from workflows.utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER
 
 
 class TimeBenchmarkPrepGIN:
@@ -79,7 +79,7 @@ class TimeBenchmarkPrepGIN:
     min_run_count = 2  # default:2
 
     # Custom attributes
-    input_config_path = str(DEFAULT_JSON_CONFIG_PATH)
+    input_config_path = str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER)
 
     def setup_cache(
         self,

From 4b296d330e8f74de7705695cec0a222be7c2d169 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Fri, 1 Dec 2023 19:23:42 +0000
Subject: [PATCH 06/54] update modules locations in tests

---
 tests/test_integration/conftest.py              | 10 +++++-----
 .../test_cellfinder_workflow.py                 | 17 +++++++----------
 tests/test_unit/test_cellfinder.py              |  1 +
 tests/test_unit/test_placeholder.py             |  2 --
 tests/test_unit/test_utils.py                   |  5 +++++
 5 files changed, 18 insertions(+), 17 deletions(-)
 create mode 100644 tests/test_unit/test_cellfinder.py
 delete mode 100644 tests/test_unit/test_placeholder.py
 create mode 100644 tests/test_unit/test_utils.py

diff --git a/tests/test_integration/conftest.py b/tests/test_integration/conftest.py
index d9207917..d66187e0 100644
--- a/tests/test_integration/conftest.py
+++ b/tests/test_integration/conftest.py
@@ -5,7 +5,7 @@
 import pooch
 import pytest
 
-from brainglobe_workflows.cellfinder.cellfinder_main import CellfinderConfig
+from workflows.cellfinder import CellfinderConfig
 
 
 def make_config_dict_fetch_from_local(cellfinder_cache_dir: Path) -> dict:
@@ -169,18 +169,18 @@ def data_hash() -> str:
 @pytest.fixture(scope="session")
 def default_json_config_path() -> Path:
     """Return the path to the json file
-    with the default config parameters
+    with the default config parameters for cellfinder
 
     Returns
     -------
     Path
         path to the json file with the default config parameters
     """
-    from brainglobe_workflows.cellfinder.cellfinder_main import (
-        DEFAULT_JSON_CONFIG_PATH,
+    from workflows.utils import (
+        DEFAULT_JSON_CONFIG_PATH_CELLFINDER,
     )
 
-    return DEFAULT_JSON_CONFIG_PATH
+    return DEFAULT_JSON_CONFIG_PATH_CELLFINDER
 
 
 @pytest.fixture()
diff --git a/tests/test_integration/test_cellfinder_workflow.py b/tests/test_integration/test_cellfinder_workflow.py
index e55d0a46..930dcffe 100644
--- a/tests/test_integration/test_cellfinder_workflow.py
+++ b/tests/test_integration/test_cellfinder_workflow.py
@@ -3,7 +3,7 @@
 import sys
 from pathlib import Path
 
-from brainglobe_workflows.cellfinder.cellfinder_main import CellfinderConfig
+from workflows.cellfinder import CellfinderConfig
 
 
 def test_run_with_default_config(tmp_path, default_json_config_path):
@@ -31,9 +31,8 @@ def test_run_with_default_config(tmp_path, default_json_config_path):
         [
             sys.executable,
             Path(__file__).resolve().parents[2]
-            / "brainglobe_workflows"
-            / "cellfinder"
-            / "cellfinder_main.py",
+            / "workflows"
+            / "cellfinder.py",
         ],
         cwd=tmp_path,
         stdout=subprocess.PIPE,
@@ -76,9 +75,8 @@ def test_run_with_GIN_data(
         [
             sys.executable,
             Path(__file__).resolve().parents[2]
-            / "brainglobe_workflows"
-            / "cellfinder"
-            / "cellfinder_main.py",
+            / "workflows"
+            / "cellfinder.py",
             "--config",
             str(path_to_config_fetch_GIN),
         ],
@@ -130,9 +128,8 @@ def test_run_with_local_data(
         [
             sys.executable,
             Path(__file__).resolve().parents[2]
-            / "brainglobe_workflows"
-            / "cellfinder"
-            / "cellfinder_main.py",
+            / "workflows"
+            / "cellfinder.py",
             "--config",
             str(path_to_config_fetch_local),
         ],
diff --git a/tests/test_unit/test_cellfinder.py b/tests/test_unit/test_cellfinder.py
new file mode 100644
index 00000000..6e69318b
--- /dev/null
+++ b/tests/test_unit/test_cellfinder.py
@@ -0,0 +1 @@
+# cellfinder-specific unit tests
diff --git a/tests/test_unit/test_placeholder.py b/tests/test_unit/test_placeholder.py
deleted file mode 100644
index 3ada1ee4..00000000
--- a/tests/test_unit/test_placeholder.py
+++ /dev/null
@@ -1,2 +0,0 @@
-def test_placeholder():
-    assert True
diff --git a/tests/test_unit/test_utils.py b/tests/test_unit/test_utils.py
new file mode 100644
index 00000000..95296bb2
--- /dev/null
+++ b/tests/test_unit/test_utils.py
@@ -0,0 +1,5 @@
+# general unit tests
+
+
+def test_args_parser():
+    assert True

From 8c0617d261f4862b59b5b09bc2463311b5f508cc Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Mon, 4 Dec 2023 17:15:24 +0000
Subject: [PATCH 07/54] replace argparse by typer

---
 pyproject.toml          |  3 ++-
 workflows/cellfinder.py | 28 +++++++++++++---------------
 2 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index c78a6bb6..90393925 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,8 @@ requires-python = ">=3.8.0"
 dynamic = ["version"]
 dependencies = [
     "pooch",
-    "cellfinder-core"
+    "cellfinder-core",
+    "typer"
 ]
 license = {text = "BSD-3-Clause"}
 
diff --git a/workflows/cellfinder.py b/workflows/cellfinder.py
index e022aae5..fe45d432 100644
--- a/workflows/cellfinder.py
+++ b/workflows/cellfinder.py
@@ -21,16 +21,17 @@
 import datetime
 import json
 import os
-import sys
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Optional, Tuple, Union
 
 import pooch
+import typer
 from brainglobe_utils.IO.cells import save_cells
 from cellfinder_core.main import main as cellfinder_run
 from cellfinder_core.tools.IO import read_with_dask
 from cellfinder_core.train.train_yml import depth_type
+from utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER, setup_logger
 
 Pathlike = Union[str, os.PathLike]
 
@@ -91,7 +92,7 @@ class CellfinderConfig:
     detected_cells_path: Pathlike = ""
 
 
-def setup(argv=None) -> CellfinderConfig:
+def setup(input_config_path) -> CellfinderConfig:
     def retrieve_input_data(config: CellfinderConfig) -> CellfinderConfig:
         """
         Adds the lists of input data files (signal and background)
@@ -271,14 +272,11 @@ def setup_workflow(input_config_path: Path) -> CellfinderConfig:
 
         return config
 
-    # setup logger and parse command line arguments
-    argv = (
-        argv or sys.argv[1:]
-    )  # sys.argv in most cases except for testing when we monkeypatch it
-    args, logger = setup_logger_and_parse_cli_arguments(argv)
+    # setup logger
+    logger = setup_logger()
 
     # run setup steps and return config
-    cfg = setup_workflow(Path(args.config))
+    cfg = setup_workflow(Path(input_config_path))
 
     return cfg
 
@@ -318,14 +316,14 @@ def run_workflow_from_cellfinder_run(cfg: CellfinderConfig):
     )
 
 
-if __name__ == "__main__":
-    from utils import (
-        DEFAULT_JSON_CONFIG_PATH_CELLFINDER,
-        setup_logger_and_parse_cli_arguments,
-    )
-
+def main(input_config_path=DEFAULT_JSON_CONFIG_PATH_CELLFINDER):
     # run setup
-    cfg = setup()
+    cfg = setup(input_config_path)
 
     # run workflow
     run_workflow_from_cellfinder_run(cfg)  # only this will be benchmarked
+
+
+if __name__ == "__main__":
+    # run setup and workflow
+    typer.run(main)

From 16db368a0465571ac63fa93d72524f76d4271bf9 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Mon, 4 Dec 2023 17:20:18 +0000
Subject: [PATCH 08/54] add test for setup logger

---
 tests/test_unit/test_utils.py |   9 ++-
 workflows/utils.py            | 106 ++++++++--------------------------
 2 files changed, 31 insertions(+), 84 deletions(-)

diff --git a/tests/test_unit/test_utils.py b/tests/test_unit/test_utils.py
index 95296bb2..95374e95 100644
--- a/tests/test_unit/test_utils.py
+++ b/tests/test_unit/test_utils.py
@@ -1,5 +1,8 @@
-# general unit tests
+import logging
 
+from workflows.utils import setup_logger
 
-def test_args_parser():
-    assert True
+
+def test_setup_logger():
+    logger = setup_logger()
+    assert logger.level == logging.DEBUG
diff --git a/workflows/utils.py b/workflows/utils.py
index abb76e8c..78db595d 100644
--- a/workflows/utils.py
+++ b/workflows/utils.py
@@ -1,4 +1,4 @@
-import argparse
+# import argparse
 import logging
 import sys
 from pathlib import Path
@@ -10,83 +10,27 @@
 )
 
 
-def setup_logger_and_parse_cli_arguments(argv):
-    def setup_logger() -> logging.Logger:
-        """Setup a logger for this script
-
-        The logger's level is set to DEBUG, and it
-        is linked to a handler that writes to the
-        console and whose level is
-
-        Returns
-        -------
-        logging.Logger
-            a logger object
-        """
-        # define handler that writes to stdout
-        console_handler = logging.StreamHandler(sys.stdout)
-        console_format = logging.Formatter(
-            "%(name)s %(levelname)s: %(message)s"
-        )
-        console_handler.setFormatter(console_format)
-
-        # define logger and link to handler
-        logger = logging.getLogger(
-            __name__
-        )  # if imported as a module, the logger is named after the module
-        logger.setLevel(logging.DEBUG)
-        logger.addHandler(console_handler)
-        return logger
-
-    def parse_cli_arguments(argv_) -> argparse.Namespace:
-        """Define argument parser for cellfinder
-        workflow script.
-
-        It expects a path to a json file with the
-        parameters required to run the workflow.
-        If none is provided, the default
-
-        Returns
-        -------
-        args : argparse.Namespace
-            command line input arguments parsed
-        """
-        # initialise argument parser
-        parser = argparse.ArgumentParser(
-            description=(
-                "To launch the workflow with "
-                "a specific set of input parameters, run: "
-                "`python cellfinder_main.py --config path/to/config.json`"
-                "where path/to/input/config.json is the json file "
-                "containing the workflow parameters."
-            )
-        )
-        # add arguments
-        parser.add_argument(
-            "-c",
-            "--config",
-            default=str(
-                DEFAULT_JSON_CONFIG_PATH_CELLFINDER
-            ),  # can I do argv_[0]? --- TODO use typer!
-            type=str,
-            metavar="CONFIG",  # a name for usage messages
-            help="",
-        )
-
-        # build parser object
-        args = parser.parse_args(argv_)
-
-        # print error if required arguments not provided
-        if not args.config:
-            logger.error("Paths to input config not provided.")
-            parser.print_help()
-
-        return args
-
-    # setup logger
-    logger = setup_logger()
-
-    # parse command line arguments
-    args = parse_cli_arguments(argv)
-
-    return args, logger
+def setup_logger() -> logging.Logger:
+    """Setup a logger for this script
+
+    The logger's level is set to DEBUG, and it
+    is linked to a handler that writes to the
+    console
+
+    Returns
+    -------
+    logging.Logger
+        a logger object
+    """
+    # define handler that writes to stdout
+    console_handler = logging.StreamHandler(sys.stdout)
+    console_format = logging.Formatter("%(name)s %(levelname)s: %(message)s")
+    console_handler.setFormatter(console_format)
+
+    # define logger and link to handler
+    logger = logging.getLogger(
+        __name__
+    )  # if imported as a module, the logger is named after the module
+    logger.setLevel(logging.DEBUG)
+    logger.addHandler(console_handler)
+    return logger

From bd8276053a9ce4b4369cfca908f70d90db53ee98 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Mon, 4 Dec 2023 18:32:26 +0000
Subject: [PATCH 09/54] pass logger by name

---
 pyproject.toml                |   1 +
 tests/test_unit/test_utils.py |   5 +
 workflows/cellfinder.py       | 367 ++++++++++++++++++----------------
 workflows/utils.py            |   1 +
 4 files changed, 204 insertions(+), 170 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 90393925..473f949d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,6 +6,7 @@ readme = "README.md"
 requires-python = ">=3.8.0"
 dynamic = ["version"]
 dependencies = [
+    "asv",
     "pooch",
     "cellfinder-core",
     "typer"
diff --git a/tests/test_unit/test_utils.py b/tests/test_unit/test_utils.py
index 95374e95..2b29e150 100644
--- a/tests/test_unit/test_utils.py
+++ b/tests/test_unit/test_utils.py
@@ -5,4 +5,9 @@
 
 def test_setup_logger():
     logger = setup_logger()
+
     assert logger.level == logging.DEBUG
+    assert logger.name == "workflows.utils"
+    assert logger.hasHandlers()
+    assert len(logger.handlers) == 1
+    assert logger.handlers[0].name == "console_handler"
diff --git a/workflows/cellfinder.py b/workflows/cellfinder.py
index fe45d432..040d900d 100644
--- a/workflows/cellfinder.py
+++ b/workflows/cellfinder.py
@@ -20,6 +20,7 @@
 
 import datetime
 import json
+import logging
 import os
 from dataclasses import dataclass
 from pathlib import Path
@@ -31,7 +32,8 @@
 from cellfinder_core.main import main as cellfinder_run
 from cellfinder_core.tools.IO import read_with_dask
 from cellfinder_core.train.train_yml import depth_type
-from utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER, setup_logger
+
+from workflows.utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER, setup_logger
 
 Pathlike = Union[str, os.PathLike]
 
@@ -92,188 +94,213 @@ class CellfinderConfig:
     detected_cells_path: Pathlike = ""
 
 
-def setup(input_config_path) -> CellfinderConfig:
-    def retrieve_input_data(config: CellfinderConfig) -> CellfinderConfig:
-        """
-        Adds the lists of input data files (signal and background)
-        to the config.
-
-        It first checks if the input data exists locally.
-        - If both directories (signal and background) exist, the lists of
-        signal and background files are added to the config.
-        - If exactly one of the input data directories is missing, an error
-        message is logged.
-        - If neither of them exist, the data is retrieved from the provided GIN
-        repository. If no URL or hash to GIN is provided, an error is shown.
-
-        Parameters
-        ----------
-        config : CellfinderConfig
-            a dataclass whose attributes are the parameters
-            for running cellfinder.
-
-        Returns
-        -------
-        config : CellfinderConfig
-            a dataclass whose attributes are the parameters
-            for running cellfinder.
-        """
-        # Check if input data (signal and background) exist locally.
-        # If both directories exist, get list of signal and background files
-        if (
-            Path(config.signal_dir_path).exists()
-            and Path(config.background_dir_path).exists()
-        ):
-            logger.info("Fetching input data from the local directories")
+def read_cellfinder_config(input_config_path):
+    """Instantiate a CellfinderConfig from the input json file
+    (assumes config is json serializable)
+
+    Parameters
+    ----------
+    input_config_path : _type_
+        _description_
+
+    Returns
+    -------
+    _type_
+        _description_
+    """
+    # read input config
+    with open(input_config_path) as cfg:
+        config_dict = json.load(cfg)
+    config = CellfinderConfig(**config_dict)
+
+    return config
+
+
+def retrieve_input_data(config: CellfinderConfig) -> CellfinderConfig:
+    """
+    Adds the lists of input data files (signal and background)
+    to the config.
 
+    It first checks if the input data exists locally.
+    - If both directories (signal and background) exist, the lists of
+    signal and background files are added to the config.
+    - If exactly one of the input data directories is missing, an error
+    message is logged.
+    - If neither of them exist, the data is retrieved from the provided GIN
+    repository. If no URL or hash to GIN is provided, an error is shown.
+
+    Parameters
+    ----------
+    config : CellfinderConfig
+        a dataclass whose attributes are the parameters
+        for running cellfinder.
+
+    Returns
+    -------
+    config : CellfinderConfig
+        a dataclass whose attributes are the parameters
+        for running cellfinder.
+    """
+    # Fetch logger
+    logger = logging.getLogger("root")
+
+    # Check if input data (signal and background) exist locally.
+    # If both directories exist, get list of signal and background files
+    if (
+        Path(config.signal_dir_path).exists()
+        and Path(config.background_dir_path).exists()
+    ):
+        logger.info("Fetching input data from the local directories")
+
+        config.list_signal_files = [
+            f
+            for f in Path(config.signal_dir_path).resolve().iterdir()
+            if f.is_file()
+        ]
+        config.list_background_files = [
+            f
+            for f in Path(config.background_dir_path).resolve().iterdir()
+            if f.is_file()
+        ]
+
+    # If exactly one of the input data directories is missing, print error
+    elif (
+        Path(config.signal_dir_path).resolve().exists()
+        or Path(config.background_dir_path).resolve().exists()
+    ):
+        if not Path(config.signal_dir_path).resolve().exists():
+            logger.error(
+                f"The directory {config.signal_dir_path} does not exist"
+            )
+        else:
+            logger.error(
+                f"The directory {config.background_dir_path} " "does not exist"
+            )
+
+    # If neither of them exist, retrieve data from GIN repository
+    else:
+        # check if GIN URL and hash are defined (log error otherwise)
+        if (not config.data_url) or (not config.data_hash):
+            logger.error(
+                "Input data not found locally, and URL/hash to "
+                "GIN repository not provided"
+            )
+
+        else:
+            # get list of files in GIN archive with pooch.retrieve
+            list_files_archive = pooch.retrieve(
+                url=config.data_url,
+                known_hash=config.data_hash,
+                path=config.install_path,  # zip will be downloaded here
+                progressbar=True,
+                processor=pooch.Unzip(
+                    extract_dir=config.extract_dir_relative
+                    # path to unzipped dir,
+                    # *relative* to the path set in 'path'
+                ),
+            )
+            logger.info("Fetching input data from the provided GIN repository")
+
+            # Check signal and background parent directories exist now
+            assert Path(config.signal_dir_path).resolve().exists()
+            assert Path(config.background_dir_path).resolve().exists()
+
+            # Add signal files to config
             config.list_signal_files = [
                 f
-                for f in Path(config.signal_dir_path).resolve().iterdir()
-                if f.is_file()
+                for f in list_files_archive
+                if f.startswith(
+                    str(Path(config.signal_dir_path).resolve())
+                )  # if str(config.signal_dir_path) in f
             ]
+
+            # Add background files to config
             config.list_background_files = [
                 f
-                for f in Path(config.background_dir_path).resolve().iterdir()
-                if f.is_file()
+                for f in list_files_archive
+                if f.startswith(
+                    str(Path(config.background_dir_path).resolve())
+                )  # if str(config.background_dir_path) in f
             ]
 
-        # If exactly one of the input data directories is missing, print error
-        elif (
-            Path(config.signal_dir_path).resolve().exists()
-            or Path(config.background_dir_path).resolve().exists()
-        ):
-            if not Path(config.signal_dir_path).resolve().exists():
-                logger.error(
-                    f"The directory {config.signal_dir_path} does not exist"
-                )
-            else:
-                logger.error(
-                    f"The directory {config.background_dir_path} "
-                    "does not exist"
-                )
-
-        # If neither of them exist, retrieve data from GIN repository
-        else:
-            # check if GIN URL and hash are defined (log error otherwise)
-            if (not config.data_url) or (not config.data_hash):
-                logger.error(
-                    "Input data not found locally, and URL/hash to "
-                    "GIN repository not provided"
-                )
-
-            else:
-                # get list of files in GIN archive with pooch.retrieve
-                list_files_archive = pooch.retrieve(
-                    url=config.data_url,
-                    known_hash=config.data_hash,
-                    path=config.install_path,  # zip will be downloaded here
-                    progressbar=True,
-                    processor=pooch.Unzip(
-                        extract_dir=config.extract_dir_relative
-                        # path to unzipped dir,
-                        # *relative* to the path set in 'path'
-                    ),
-                )
-                logger.info(
-                    "Fetching input data from the provided GIN repository"
-                )
-
-                # Check signal and background parent directories exist now
-                assert Path(config.signal_dir_path).resolve().exists()
-                assert Path(config.background_dir_path).resolve().exists()
-
-                # Add signal files to config
-                config.list_signal_files = [
-                    f
-                    for f in list_files_archive
-                    if f.startswith(
-                        str(Path(config.signal_dir_path).resolve())
-                    )  # if str(config.signal_dir_path) in f
-                ]
-
-                # Add background files to config
-                config.list_background_files = [
-                    f
-                    for f in list_files_archive
-                    if f.startswith(
-                        str(Path(config.background_dir_path).resolve())
-                    )  # if str(config.background_dir_path) in f
-                ]
-
-        return config
-
-    def setup_workflow(input_config_path: Path) -> CellfinderConfig:
-        """Run setup steps prior to executing the workflow
-
-        These setup steps include:
-        - instantiating a CellfinderConfig object with the required parameters,
-        - checking if the input data exists locally, and fetching from
-        GIN repository otherwise,
-        - adding the path to the input data files to the config, and
-        - creating a timestamped directory for the output of the workflow if
-        it doesn't exist and adding its path to the config
-
-        Parameters
-        ----------
-        input_config_path : Path
-            path to the input config file
-
-        Returns
-        -------
-        config : CellfinderConfig
-            a dataclass whose attributes are the parameters
-            for running cellfinder.
-        """
-
-        # Check config file exists
-        assert input_config_path.exists()
-
-        # Instantiate a CellfinderConfig from the input json file
-        # (assumes config is json serializable)
-        with open(input_config_path) as cfg:
-            config_dict = json.load(cfg)
-        config = CellfinderConfig(**config_dict)
-
-        # Print info logs for status
-        logger.info(f"Input config read from {input_config_path}")
-        if input_config_path == DEFAULT_JSON_CONFIG_PATH_CELLFINDER:
-            logger.info("Using default config file")
-
-        # Retrieve and add lists of input data to the config,
-        # if these are defined yet
-        if not (config.list_signal_files and config.list_signal_files):
-            # build fullpaths to inputs
-            config.signal_dir_path = str(
-                Path(config.install_path)
-                / config.extract_dir_relative
-                / config.signal_subdir
-            )
-            config.background_dir_path = str(
-                Path(config.install_path)
-                / config.extract_dir_relative
-                / config.background_subdir
-            )
-            # retrieve data
-            config = retrieve_input_data(config)
-
-        # Create timestamped output directory if it doesn't exist
-        timestamp = datetime.datetime.now()
-        timestamp_formatted = timestamp.strftime("%Y%m%d_%H%M%S")
-        output_path_timestamped = Path(config.install_path) / (
-            str(config.output_path_basename_relative) + timestamp_formatted
-        )
-        output_path_timestamped.mkdir(parents=True, exist_ok=True)
+    return config
+
+
+def setup_workflow(input_config_path: Path) -> CellfinderConfig:
+    """Run setup steps prior to executing the workflow
+
+    These setup steps include:
+    - instantiating a CellfinderConfig object with the required parameters,
+    - checking if the input data exists locally, and fetching from
+    GIN repository otherwise,
+    - adding the path to the input data files to the config, and
+    - creating a timestamped directory for the output of the workflow if
+    it doesn't exist and adding its path to the config
 
-        # Add output path and output file path to config
-        config.output_path = output_path_timestamped
-        config.detected_cells_path = (
-            config.output_path / config.detected_cells_filename
+    Parameters
+    ----------
+    input_config_path : Path
+        path to the input config file
+
+    Returns
+    -------
+    config : CellfinderConfig
+        a dataclass whose attributes are the parameters
+        for running cellfinder.
+    """
+
+    # Fetch logger
+    logger = logging.getLogger("root")
+
+    # Check config file exists
+    assert input_config_path.exists()
+
+    # Instantiate a CellfinderConfig from the input json file
+    # (assumes config is json serializable)
+    config = read_cellfinder_config(input_config_path)
+
+    # Print info logs for status
+    logger.info(f"Input config read from {input_config_path}")
+    if input_config_path == DEFAULT_JSON_CONFIG_PATH_CELLFINDER:
+        logger.info("Using default config file")
+
+    # Retrieve and add lists of input data to the config,
+    # if these are defined yet
+    if not (config.list_signal_files and config.list_signal_files):
+        # build fullpaths to inputs
+        config.signal_dir_path = str(
+            Path(config.install_path)
+            / config.extract_dir_relative
+            / config.signal_subdir
+        )
+        config.background_dir_path = str(
+            Path(config.install_path)
+            / config.extract_dir_relative
+            / config.background_subdir
         )
+        # retrieve data
+        config = retrieve_input_data(config)
+
+    # Create timestamped output directory if it doesn't exist
+    timestamp = datetime.datetime.now()
+    timestamp_formatted = timestamp.strftime("%Y%m%d_%H%M%S")
+    output_path_timestamped = Path(config.install_path) / (
+        str(config.output_path_basename_relative) + timestamp_formatted
+    )
+    output_path_timestamped.mkdir(parents=True, exist_ok=True)
+
+    # Add output path and output file path to config
+    config.output_path = output_path_timestamped
+    config.detected_cells_path = (
+        config.output_path / config.detected_cells_filename
+    )
 
-        return config
+    return config
 
+
+def setup(input_config_path) -> CellfinderConfig:
     # setup logger
-    logger = setup_logger()
+    _ = setup_logger()
 
     # run setup steps and return config
     cfg = setup_workflow(Path(input_config_path))
diff --git a/workflows/utils.py b/workflows/utils.py
index 78db595d..94d3fa14 100644
--- a/workflows/utils.py
+++ b/workflows/utils.py
@@ -26,6 +26,7 @@ def setup_logger() -> logging.Logger:
     console_handler = logging.StreamHandler(sys.stdout)
     console_format = logging.Formatter("%(name)s %(levelname)s: %(message)s")
     console_handler.setFormatter(console_format)
+    console_handler.set_name("console_handler")
 
     # define logger and link to handler
     logger = logging.getLogger(

From c1624601a8e9620a9b818d41de4c7ae2b3d71cdf Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Mon, 4 Dec 2023 19:20:47 +0000
Subject: [PATCH 10/54] unit tests for setup steps (WIP)

---
 tests/data/input_data_GIN.json                | 39 ++++++++++
 tests/data/input_data_locally.json            | 39 ++++++++++
 tests/data/input_data_missing_background.json | 39 ++++++++++
 tests/data/input_data_missing_signal.json     | 39 ++++++++++
 tests/data/input_data_not_locally_or_GIN.json | 37 +++++++++
 .../test_cellfinder_workflow.py               |  2 +-
 tests/test_unit/test_cellfinder.py            | 76 ++++++++++++++++++-
 workflows/cellfinder.py                       |  5 +-
 8 files changed, 272 insertions(+), 4 deletions(-)
 create mode 100644 tests/data/input_data_GIN.json
 create mode 100644 tests/data/input_data_locally.json
 create mode 100644 tests/data/input_data_missing_background.json
 create mode 100644 tests/data/input_data_missing_signal.json
 create mode 100644 tests/data/input_data_not_locally_or_GIN.json

diff --git a/tests/data/input_data_GIN.json b/tests/data/input_data_GIN.json
new file mode 100644
index 00000000..a80a4ba4
--- /dev/null
+++ b/tests/data/input_data_GIN.json
@@ -0,0 +1,39 @@
+{
+  "install_path": ".cellfinder_workflows",
+  "data_url": "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip",
+  "data_hash": "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914",
+  "extract_dir_relative": "cellfinder_test_data",
+  "signal_subdir": "signal",
+  "background_subdir": "background",
+  "output_path_basename_relative": "cellfinder_output_",
+  "detected_cells_filename": "detected_cells.xml",
+  "voxel_sizes": [
+    5,
+    2,
+    2
+  ],
+  "start_plane": 0,
+  "end_plane": -1,
+  "trained_model": null,
+  "model_weights": null,
+  "model": "resnet50_tv",
+  "batch_size": 32,
+  "n_free_cpus": 2,
+  "network_voxel_sizes": [
+    5,
+    1,
+    1
+  ],
+  "soma_diameter": 16,
+  "ball_xy_size": 6,
+  "ball_z_size": 15,
+  "ball_overlap_fraction": 0.6,
+  "log_sigma_size": 0.2,
+  "n_sds_above_mean_thresh": 10,
+  "soma_spread_factor": 1.4,
+  "max_cluster_size": 100000,
+  "cube_width": 50,
+  "cube_height": 50,
+  "cube_depth": 20,
+  "network_depth": "50"
+}
diff --git a/tests/data/input_data_locally.json b/tests/data/input_data_locally.json
new file mode 100644
index 00000000..a80a4ba4
--- /dev/null
+++ b/tests/data/input_data_locally.json
@@ -0,0 +1,39 @@
+{
+  "install_path": ".cellfinder_workflows",
+  "data_url": "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip",
+  "data_hash": "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914",
+  "extract_dir_relative": "cellfinder_test_data",
+  "signal_subdir": "signal",
+  "background_subdir": "background",
+  "output_path_basename_relative": "cellfinder_output_",
+  "detected_cells_filename": "detected_cells.xml",
+  "voxel_sizes": [
+    5,
+    2,
+    2
+  ],
+  "start_plane": 0,
+  "end_plane": -1,
+  "trained_model": null,
+  "model_weights": null,
+  "model": "resnet50_tv",
+  "batch_size": 32,
+  "n_free_cpus": 2,
+  "network_voxel_sizes": [
+    5,
+    1,
+    1
+  ],
+  "soma_diameter": 16,
+  "ball_xy_size": 6,
+  "ball_z_size": 15,
+  "ball_overlap_fraction": 0.6,
+  "log_sigma_size": 0.2,
+  "n_sds_above_mean_thresh": 10,
+  "soma_spread_factor": 1.4,
+  "max_cluster_size": 100000,
+  "cube_width": 50,
+  "cube_height": 50,
+  "cube_depth": 20,
+  "network_depth": "50"
+}
diff --git a/tests/data/input_data_missing_background.json b/tests/data/input_data_missing_background.json
new file mode 100644
index 00000000..cba67561
--- /dev/null
+++ b/tests/data/input_data_missing_background.json
@@ -0,0 +1,39 @@
+{
+  "install_path": ".cellfinder_workflows",
+  "data_url": "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip",
+  "data_hash": "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914",
+  "extract_dir_relative": "cellfinder_test_data",
+  "signal_subdir": "signal",
+  "background_subdir": "__",
+  "output_path_basename_relative": "cellfinder_output_",
+  "detected_cells_filename": "detected_cells.xml",
+  "voxel_sizes": [
+    5,
+    2,
+    2
+  ],
+  "start_plane": 0,
+  "end_plane": -1,
+  "trained_model": null,
+  "model_weights": null,
+  "model": "resnet50_tv",
+  "batch_size": 32,
+  "n_free_cpus": 2,
+  "network_voxel_sizes": [
+    5,
+    1,
+    1
+  ],
+  "soma_diameter": 16,
+  "ball_xy_size": 6,
+  "ball_z_size": 15,
+  "ball_overlap_fraction": 0.6,
+  "log_sigma_size": 0.2,
+  "n_sds_above_mean_thresh": 10,
+  "soma_spread_factor": 1.4,
+  "max_cluster_size": 100000,
+  "cube_width": 50,
+  "cube_height": 50,
+  "cube_depth": 20,
+  "network_depth": "50"
+}
diff --git a/tests/data/input_data_missing_signal.json b/tests/data/input_data_missing_signal.json
new file mode 100644
index 00000000..185e12a5
--- /dev/null
+++ b/tests/data/input_data_missing_signal.json
@@ -0,0 +1,39 @@
+{
+  "install_path": ".cellfinder_workflows",
+  "data_url": "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip",
+  "data_hash": "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914",
+  "extract_dir_relative": "cellfinder_test_data",
+  "signal_subdir": "__",
+  "background_subdir": "background",
+  "output_path_basename_relative": "cellfinder_output_",
+  "detected_cells_filename": "detected_cells.xml",
+  "voxel_sizes": [
+    5,
+    2,
+    2
+  ],
+  "start_plane": 0,
+  "end_plane": -1,
+  "trained_model": null,
+  "model_weights": null,
+  "model": "resnet50_tv",
+  "batch_size": 32,
+  "n_free_cpus": 2,
+  "network_voxel_sizes": [
+    5,
+    1,
+    1
+  ],
+  "soma_diameter": 16,
+  "ball_xy_size": 6,
+  "ball_z_size": 15,
+  "ball_overlap_fraction": 0.6,
+  "log_sigma_size": 0.2,
+  "n_sds_above_mean_thresh": 10,
+  "soma_spread_factor": 1.4,
+  "max_cluster_size": 100000,
+  "cube_width": 50,
+  "cube_height": 50,
+  "cube_depth": 20,
+  "network_depth": "50"
+}
diff --git a/tests/data/input_data_not_locally_or_GIN.json b/tests/data/input_data_not_locally_or_GIN.json
new file mode 100644
index 00000000..ac1ed3de
--- /dev/null
+++ b/tests/data/input_data_not_locally_or_GIN.json
@@ -0,0 +1,37 @@
+{
+  "install_path": ".cellfinder_workflows",
+  "extract_dir_relative": "cellfinder_test_data",
+  "signal_subdir": "signal",
+  "background_subdir": "background",
+  "output_path_basename_relative": "cellfinder_output_",
+  "detected_cells_filename": "detected_cells.xml",
+  "voxel_sizes": [
+    5,
+    2,
+    2
+  ],
+  "start_plane": 0,
+  "end_plane": -1,
+  "trained_model": null,
+  "model_weights": null,
+  "model": "resnet50_tv",
+  "batch_size": 32,
+  "n_free_cpus": 2,
+  "network_voxel_sizes": [
+    5,
+    1,
+    1
+  ],
+  "soma_diameter": 16,
+  "ball_xy_size": 6,
+  "ball_z_size": 15,
+  "ball_overlap_fraction": 0.6,
+  "log_sigma_size": 0.2,
+  "n_sds_above_mean_thresh": 10,
+  "soma_spread_factor": 1.4,
+  "max_cluster_size": 100000,
+  "cube_width": 50,
+  "cube_height": 50,
+  "cube_depth": 20,
+  "network_depth": "50"
+}
diff --git a/tests/test_integration/test_cellfinder_workflow.py b/tests/test_integration/test_cellfinder_workflow.py
index 930dcffe..c0fe19b8 100644
--- a/tests/test_integration/test_cellfinder_workflow.py
+++ b/tests/test_integration/test_cellfinder_workflow.py
@@ -34,7 +34,7 @@ def test_run_with_default_config(tmp_path, default_json_config_path):
             / "workflows"
             / "cellfinder.py",
         ],
-        cwd=tmp_path,
+        cwd=tmp_path,  # -------------------
         stdout=subprocess.PIPE,
         stderr=subprocess.STDOUT,
         text=True,
diff --git a/tests/test_unit/test_cellfinder.py b/tests/test_unit/test_cellfinder.py
index 6e69318b..7da49aa7 100644
--- a/tests/test_unit/test_cellfinder.py
+++ b/tests/test_unit/test_cellfinder.py
@@ -1 +1,75 @@
-# cellfinder-specific unit tests
+import json
+import logging
+
+from workflows.cellfinder import read_cellfinder_config, retrieve_input_data
+from workflows.utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER
+
+# @pytest.fixture() --- for where configs are
+# def
+
+
+def test_read_cellfinder_config(
+    input_config_path=DEFAULT_JSON_CONFIG_PATH_CELLFINDER,
+):
+    # read json as Cellfinder config
+    config = read_cellfinder_config(input_config_path)
+
+    # read json as dict
+    with open(input_config_path) as cfg:
+        config_dict = json.load(cfg)
+
+    # check keys of dict are a subset of Cellfinder config attributes
+    assert all(
+        [ky in config.__dataclass_fields__.keys() for ky in config_dict.keys()]
+    )
+
+
+# define different configs
+# @pytest.mark.parametrize(
+#     "input_config_path, message",
+#     [
+#         (DEFAULT_JSON_CONFIG_PATH_CELLFINDER,
+# "Fetching input data from the local directories"),
+#         (, "The directory does not exist"),
+#         (, "The directory does not exist"),
+#         (,"Input data not found locally,
+# and URL/hash to GIN repository not provided"),
+#         (,"Fetching input data from the provided GIN repository")
+#     ]
+# )
+def test_retrieve_input_data(caplog, input_config_path, message):
+    # set logger to capture
+    caplog.set_level(
+        logging.DEBUG, logger="root"
+    )  # --- why root? :( "workflows.utils")
+
+    # read json as Cellfinder config
+    config = read_cellfinder_config(input_config_path)
+
+    # retrieve data
+    retrieve_input_data(config)
+
+    assert message in caplog.messages
+
+    # if (
+    #     Path(config.signal_dir_path).exists()
+    #     and Path(config.background_dir_path).exists()
+    # ):
+
+    #     # with caplog.at_level(
+    #     #     logging.DEBUG,
+    #     #     logger="root" # why root?
+    #     # ):
+    #     updated_config = retrieve_input_data(config)
+
+    #     assert message in caplog.messages
+
+    # # If exactly one of the input data directories is missing, print error
+    # elif (
+    #     Path(config.signal_dir_path).resolve().exists()
+    #     or Path(config.background_dir_path).resolve().exists()
+    # ):
+
+
+# def test_setup_workflow():
+#     pass
diff --git a/workflows/cellfinder.py b/workflows/cellfinder.py
index 040d900d..16851734 100644
--- a/workflows/cellfinder.py
+++ b/workflows/cellfinder.py
@@ -98,6 +98,7 @@ def read_cellfinder_config(input_config_path):
     """Instantiate a CellfinderConfig from the input json file
     (assumes config is json serializable)
 
+
     Parameters
     ----------
     input_config_path : _type_
@@ -142,7 +143,7 @@ def retrieve_input_data(config: CellfinderConfig) -> CellfinderConfig:
         for running cellfinder.
     """
     # Fetch logger
-    logger = logging.getLogger("root")
+    logger = logging.getLogger("workflow.utils")
 
     # Check if input data (signal and background) exist locally.
     # If both directories exist, get list of signal and background files
@@ -250,7 +251,7 @@ def setup_workflow(input_config_path: Path) -> CellfinderConfig:
     """
 
     # Fetch logger
-    logger = logging.getLogger("root")
+    logger = logging.getLogger("workflow.utils")
 
     # Check config file exists
     assert input_config_path.exists()

From e2a5eeabb23dcf96fff5e9bb53cd27d9a2c1c3af Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Wed, 6 Dec 2023 14:01:28 +0000
Subject: [PATCH 11/54] rename path variable in config. add unit tests for
 setup steps.

---
 benchmarks/cellfinder.py                      |   6 +-
 tests/data/input_data_GIN.json                |   2 +-
 tests/data/input_data_locally.json            |   4 +-
 tests/data/input_data_missing_background.json |   4 +-
 tests/data/input_data_missing_signal.json     |   4 +-
 tests/data/input_data_not_locally_or_GIN.json |   2 +-
 tests/test_integration/conftest.py            |   8 +-
 tests/test_unit/test_cellfinder.py            | 196 +++++++++++++-----
 workflows/cellfinder.py                       |  46 ++--
 workflows/configs/cellfinder.json             |   2 +-
 10 files changed, 186 insertions(+), 88 deletions(-)

diff --git a/benchmarks/cellfinder.py b/benchmarks/cellfinder.py
index 8839e434..69ba6988 100644
--- a/benchmarks/cellfinder.py
+++ b/benchmarks/cellfinder.py
@@ -11,9 +11,7 @@
     CellfinderConfig,
     run_workflow_from_cellfinder_run,
 )
-from workflows.cellfinder import (
-    setup as setup_cellfinder_workflow,
-)
+from workflows.cellfinder import setup as setup_cellfinder_workflow
 from workflows.utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER
 
 
@@ -114,7 +112,7 @@ def setup_cache(
             known_hash=config.data_hash,
             path=config.install_path,
             progressbar=True,
-            processor=pooch.Unzip(extract_dir=config.extract_dir_relative),
+            processor=pooch.Unzip(extract_dir=config.data_dir_relative),
         )
 
         # Check paths to input data should now exist in config
diff --git a/tests/data/input_data_GIN.json b/tests/data/input_data_GIN.json
index a80a4ba4..daf056a5 100644
--- a/tests/data/input_data_GIN.json
+++ b/tests/data/input_data_GIN.json
@@ -2,7 +2,7 @@
   "install_path": ".cellfinder_workflows",
   "data_url": "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip",
   "data_hash": "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914",
-  "extract_dir_relative": "cellfinder_test_data",
+  "data_dir_relative": "cellfinder_test_data",
   "signal_subdir": "signal",
   "background_subdir": "background",
   "output_path_basename_relative": "cellfinder_output_",
diff --git a/tests/data/input_data_locally.json b/tests/data/input_data_locally.json
index a80a4ba4..e3761543 100644
--- a/tests/data/input_data_locally.json
+++ b/tests/data/input_data_locally.json
@@ -1,8 +1,6 @@
 {
   "install_path": ".cellfinder_workflows",
-  "data_url": "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip",
-  "data_hash": "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914",
-  "extract_dir_relative": "cellfinder_test_data",
+  "data_dir_relative": "cellfinder_test_data",
   "signal_subdir": "signal",
   "background_subdir": "background",
   "output_path_basename_relative": "cellfinder_output_",
diff --git a/tests/data/input_data_missing_background.json b/tests/data/input_data_missing_background.json
index cba67561..52454f9b 100644
--- a/tests/data/input_data_missing_background.json
+++ b/tests/data/input_data_missing_background.json
@@ -1,8 +1,6 @@
 {
   "install_path": ".cellfinder_workflows",
-  "data_url": "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip",
-  "data_hash": "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914",
-  "extract_dir_relative": "cellfinder_test_data",
+  "data_dir_relative": "cellfinder_test_data",
   "signal_subdir": "signal",
   "background_subdir": "__",
   "output_path_basename_relative": "cellfinder_output_",
diff --git a/tests/data/input_data_missing_signal.json b/tests/data/input_data_missing_signal.json
index 185e12a5..22c5247b 100644
--- a/tests/data/input_data_missing_signal.json
+++ b/tests/data/input_data_missing_signal.json
@@ -1,8 +1,6 @@
 {
   "install_path": ".cellfinder_workflows",
-  "data_url": "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip",
-  "data_hash": "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914",
-  "extract_dir_relative": "cellfinder_test_data",
+  "data_dir_relative": "cellfinder_test_data",
   "signal_subdir": "__",
   "background_subdir": "background",
   "output_path_basename_relative": "cellfinder_output_",
diff --git a/tests/data/input_data_not_locally_or_GIN.json b/tests/data/input_data_not_locally_or_GIN.json
index ac1ed3de..e3761543 100644
--- a/tests/data/input_data_not_locally_or_GIN.json
+++ b/tests/data/input_data_not_locally_or_GIN.json
@@ -1,6 +1,6 @@
 {
   "install_path": ".cellfinder_workflows",
-  "extract_dir_relative": "cellfinder_test_data",
+  "data_dir_relative": "cellfinder_test_data",
   "signal_subdir": "signal",
   "background_subdir": "background",
   "output_path_basename_relative": "cellfinder_output_",
diff --git a/tests/test_integration/conftest.py b/tests/test_integration/conftest.py
index d66187e0..c8729b62 100644
--- a/tests/test_integration/conftest.py
+++ b/tests/test_integration/conftest.py
@@ -29,7 +29,7 @@ def make_config_dict_fetch_from_local(cellfinder_cache_dir: Path) -> dict:
     """
     return {
         "install_path": cellfinder_cache_dir,
-        "extract_dir_relative": "cellfinder_test_data",  # relative path
+        "data_dir_relative": "cellfinder_test_data",  # relative path
         "signal_subdir": "signal",
         "background_subdir": "background",
         "output_path_basename_relative": "cellfinder_output_",
@@ -176,9 +176,7 @@ def default_json_config_path() -> Path:
     Path
         path to the json file with the default config parameters
     """
-    from workflows.utils import (
-        DEFAULT_JSON_CONFIG_PATH_CELLFINDER,
-    )
+    from workflows.utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER
 
     return DEFAULT_JSON_CONFIG_PATH_CELLFINDER
 
@@ -274,7 +272,7 @@ def path_to_config_fetch_local(
         path=config.install_path,  # path to download zip to
         progressbar=True,
         processor=pooch.Unzip(
-            extract_dir=config.extract_dir_relative
+            extract_dir=config.data_dir_relative
             # path to unzipped dir, *relative*  to 'path'
         ),
     )
diff --git a/tests/test_unit/test_cellfinder.py b/tests/test_unit/test_cellfinder.py
index 7da49aa7..b363229b 100644
--- a/tests/test_unit/test_cellfinder.py
+++ b/tests/test_unit/test_cellfinder.py
@@ -1,16 +1,50 @@
 import json
 import logging
+from pathlib import Path
+
+import pooch
+import pytest
+
+from workflows.cellfinder import (
+    add_signal_and_background_files,
+    read_cellfinder_config,
+)
+
+
+@pytest.fixture()
+def input_configs_dir():
+    return Path(__file__).parents[1] / "data"
+
+
+@pytest.fixture(scope="session")
+def cellfinder_GIN_data() -> dict:
+    """Return the URL and hash to the GIN repository with the input data
+
+    Returns
+    -------
+    dict
+        URL and hash of the GIN repository with the cellfinder test data
+    """
+    return {
+        "url": "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip",
+        "hash": "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914",  # noqa
+    }
+
+
+@pytest.mark.parametrize(
+    "input_config",
+    [
+        "input_data_GIN.json",
+        "input_data_locally.json",
+        "input_data_missing_background.json",
+        "input_data_missing_signal.json",
+        "input_data_not_locally_or_GIN.json",
+    ],
+)
+def test_read_cellfinder_config(input_config, input_configs_dir):
+    # path to config json file
+    input_config_path = input_configs_dir / input_config
 
-from workflows.cellfinder import read_cellfinder_config, retrieve_input_data
-from workflows.utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER
-
-# @pytest.fixture() --- for where configs are
-# def
-
-
-def test_read_cellfinder_config(
-    input_config_path=DEFAULT_JSON_CONFIG_PATH_CELLFINDER,
-):
     # read json as Cellfinder config
     config = read_cellfinder_config(input_config_path)
 
@@ -18,58 +52,124 @@ def test_read_cellfinder_config(
     with open(input_config_path) as cfg:
         config_dict = json.load(cfg)
 
-    # check keys of dict are a subset of Cellfinder config attributes
+    # check keys of dictionary are a subset of Cellfinder config attributes
     assert all(
         [ky in config.__dataclass_fields__.keys() for ky in config_dict.keys()]
     )
 
 
-# define different configs
-# @pytest.mark.parametrize(
-#     "input_config_path, message",
-#     [
-#         (DEFAULT_JSON_CONFIG_PATH_CELLFINDER,
-# "Fetching input data from the local directories"),
-#         (, "The directory does not exist"),
-#         (, "The directory does not exist"),
-#         (,"Input data not found locally,
-# and URL/hash to GIN repository not provided"),
-#         (,"Fetching input data from the provided GIN repository")
-#     ]
-# )
-def test_retrieve_input_data(caplog, input_config_path, message):
+@pytest.mark.parametrize(
+    "input_config, message",
+    [
+        (
+            "input_data_GIN.json",
+            "Fetching input data from the provided GIN repository",
+        ),
+        (
+            "input_data_locally.json",
+            "Fetching input data from the local directories",
+        ),
+        ("input_data_missing_background.json", "The directory does not exist"),
+        ("input_data_missing_signal.json", "The directory does not exist"),
+        (
+            "input_data_not_locally_or_GIN.json",
+            "Input data not found locally, and URL/hash to"
+            "GIN repository not provided",
+        ),
+    ],
+)
+def test_add_signal_and_background_files(
+    caplog,
+    tmp_path,
+    cellfinder_GIN_data,
+    input_configs_dir,
+    input_config,
+    message,
+):
+    """_summary_
+
+    Parameters
+    ----------
+    caplog : _type_
+        _description_
+    tmp_path : Path
+        path to pytest-generated temporary directory
+    input_configs_dir : _type_
+        _description_
+    input_config : _type_
+        _description_
+    message : _type_
+        _description_
+    """
     # set logger to capture
-    caplog.set_level(
-        logging.DEBUG, logger="root"
-    )  # --- why root? :( "workflows.utils")
+    # TODO: --- why root? :( "workflows.utils")
+    caplog.set_level(logging.DEBUG, logger="root")
 
     # read json as Cellfinder config
-    config = read_cellfinder_config(input_config_path)
+    config = read_cellfinder_config(input_configs_dir / input_config)
+
+    # monkeypatch cellfinder config:
+    # - set install_path to pytest temp directory
+    config.install_path = tmp_path / config.install_path
+
+    # check lists of signal and background files are not defined
+    assert not (config.list_signal_files and config.list_background_files)
+
+    # build fullpaths to input data directories
+    config.signal_dir_path = str(
+        Path(config.install_path)
+        / config.data_dir_relative
+        / config.signal_subdir
+    )
+    config.background_dir_path = str(
+        Path(config.install_path)
+        / config.data_dir_relative
+        / config.background_subdir
+    )
+
+    # monkeypatch cellfinder config:
+    # - if config is "local" or "signal/background missing":
+    #    ensure signal and background data from GIN are downloaded locally
+    if message in [
+        "Fetching input data from the local directories",
+        "The directory does not exist",
+    ]:
+        # fetch data from GIN and download locally
+        # download GIN data to specified local directory
+        # can I download data only once? - but then same pytest directory?
+        pooch.retrieve(
+            url=cellfinder_GIN_data["url"],
+            known_hash=cellfinder_GIN_data["hash"],
+            path=config.install_path,  # path to download zip to
+            progressbar=True,
+            processor=pooch.Unzip(
+                extract_dir=config.data_dir_relative
+                # path to unzipped dir, *relative*  to 'path'
+            ),
+        )
 
     # retrieve data
-    retrieve_input_data(config)
+    add_signal_and_background_files(config)
 
-    assert message in caplog.messages
+    assert message in caplog.messages[-1]
 
-    # if (
-    #     Path(config.signal_dir_path).exists()
-    #     and Path(config.background_dir_path).exists()
-    # ):
 
-    #     # with caplog.at_level(
-    #     #     logging.DEBUG,
-    #     #     logger="root" # why root?
-    #     # ):
-    #     updated_config = retrieve_input_data(config)
+# def test_setup_workflow(input_config_path):
+#     """Test setup steps for the cellfinder workflow are completed
 
-    #     assert message in caplog.messages
+#     These setup steps include:
+#     - instantiating a CellfinderConfig object with the required parameters,
+#     - add signal and background files to config if these do not exist
+#     - creating a timestamped directory for the output of the workflow if
+#     it doesn't exist and adding its path to the config
+#     """
 
-    # # If exactly one of the input data directories is missing, print error
-    # elif (
-    #     Path(config.signal_dir_path).resolve().exists()
-    #     or Path(config.background_dir_path).resolve().exists()
-    # ):
 
+#     config = setup_workflow(input_config_path)
 
-# def test_setup_workflow():
-#     pass
+#     assert config.list_signal_files # all files exist
+#     assert config.list_background_files  # all files exist
+#     assert config.output_path
+# #---should be timestamped with this format strftime("%Y%m%d_%H%M%S")
+#     assert config.detected_cells_path
+# # should be config.output_path / config.detected_cells_filename
diff --git a/workflows/cellfinder.py b/workflows/cellfinder.py
index 16851734..fd0ab582 100644
--- a/workflows/cellfinder.py
+++ b/workflows/cellfinder.py
@@ -45,13 +45,14 @@ class CellfinderConfig:
     the cellfinder preprocessing steps.
     """
 
-    # cellfinder workflows cache directory
-    install_path: Pathlike
-
-    # cached subdirectory to save data to
-    extract_dir_relative: Pathlike
+    # input data
+    # data_dir_relative: parent directory to signal and background,
+    # relative to install path
+    data_dir_relative: Pathlike
     signal_subdir: str
     background_subdir: str
+
+    # output
     output_path_basename_relative: Pathlike
     detected_cells_filename: Pathlike
 
@@ -80,6 +81,9 @@ class CellfinderConfig:
     cube_depth: int
     network_depth: depth_type
 
+    # install path (root for all inputs and outputs)
+    install_path: Pathlike = ".cellfinder_workflows"
+
     # origin of data to download (if required)
     data_url: Optional[str] = None
     data_hash: Optional[str] = None
@@ -89,9 +93,9 @@ class CellfinderConfig:
     list_signal_files: Optional[list] = None
     list_background_files: Optional[list] = None
     output_path: Pathlike = ""
+    detected_cells_path: Pathlike = ""
     signal_dir_path: Pathlike = ""
     background_dir_path: Pathlike = ""
-    detected_cells_path: Pathlike = ""
 
 
 def read_cellfinder_config(input_config_path):
@@ -117,10 +121,12 @@ def read_cellfinder_config(input_config_path):
     return config
 
 
-def retrieve_input_data(config: CellfinderConfig) -> CellfinderConfig:
+def add_signal_and_background_files(
+    config: CellfinderConfig,
+) -> CellfinderConfig:
     """
     Adds the lists of input data files (signal and background)
-    to the config.
+    to the config, when these are not defined.
 
     It first checks if the input data exists locally.
     - If both directories (signal and background) exist, the lists of
@@ -145,7 +151,7 @@ def retrieve_input_data(config: CellfinderConfig) -> CellfinderConfig:
     # Fetch logger
     logger = logging.getLogger("workflow.utils")
 
-    # Check if input data (signal and background) exist locally.
+    # Check if input data directories (signal and background) exist locally.
     # If both directories exist, get list of signal and background files
     if (
         Path(config.signal_dir_path).exists()
@@ -178,7 +184,8 @@ def retrieve_input_data(config: CellfinderConfig) -> CellfinderConfig:
                 f"The directory {config.background_dir_path} " "does not exist"
             )
 
-    # If neither of them exist, retrieve data from GIN repository
+    # If neither of the input data directories exist,
+    # retrieve data from GIN repository and add list of files to config
     else:
         # check if GIN URL and hash are defined (log error otherwise)
         if (not config.data_url) or (not config.data_hash):
@@ -195,7 +202,7 @@ def retrieve_input_data(config: CellfinderConfig) -> CellfinderConfig:
                 path=config.install_path,  # zip will be downloaded here
                 progressbar=True,
                 processor=pooch.Unzip(
-                    extract_dir=config.extract_dir_relative
+                    extract_dir=config.data_dir_relative
                     # path to unzipped dir,
                     # *relative* to the path set in 'path'
                 ),
@@ -221,7 +228,7 @@ def retrieve_input_data(config: CellfinderConfig) -> CellfinderConfig:
                 for f in list_files_archive
                 if f.startswith(
                     str(Path(config.background_dir_path).resolve())
-                )  # if str(config.background_dir_path) in f
+                )
             ]
 
     return config
@@ -265,22 +272,23 @@ def setup_workflow(input_config_path: Path) -> CellfinderConfig:
     if input_config_path == DEFAULT_JSON_CONFIG_PATH_CELLFINDER:
         logger.info("Using default config file")
 
-    # Retrieve and add lists of input data to the config,
+    # Add lists of input data files to the config,
     # if these are defined yet
-    if not (config.list_signal_files and config.list_signal_files):
-        # build fullpaths to inputs
+    if not (config.list_signal_files and config.list_background_files):
+        # build fullpaths to input directories
         config.signal_dir_path = str(
             Path(config.install_path)
-            / config.extract_dir_relative
+            / config.data_dir_relative
             / config.signal_subdir
         )
         config.background_dir_path = str(
             Path(config.install_path)
-            / config.extract_dir_relative
+            / config.data_dir_relative
             / config.background_subdir
         )
-        # retrieve data
-        config = retrieve_input_data(config)
+
+        # add signal and background files to config
+        config = add_signal_and_background_files(config)
 
     # Create timestamped output directory if it doesn't exist
     timestamp = datetime.datetime.now()
diff --git a/workflows/configs/cellfinder.json b/workflows/configs/cellfinder.json
index a80a4ba4..daf056a5 100644
--- a/workflows/configs/cellfinder.json
+++ b/workflows/configs/cellfinder.json
@@ -2,7 +2,7 @@
   "install_path": ".cellfinder_workflows",
   "data_url": "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip",
   "data_hash": "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914",
-  "extract_dir_relative": "cellfinder_test_data",
+  "data_dir_relative": "cellfinder_test_data",
   "signal_subdir": "signal",
   "background_subdir": "background",
   "output_path_basename_relative": "cellfinder_output_",

From 7c5e59aed6c37eba08f9478907f1f687a368500c Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Wed, 6 Dec 2023 15:21:50 +0000
Subject: [PATCH 12/54] replace message by message pattern for regex

---
 tests/test_unit/test_cellfinder.py | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/tests/test_unit/test_cellfinder.py b/tests/test_unit/test_cellfinder.py
index b363229b..cbca913c 100644
--- a/tests/test_unit/test_cellfinder.py
+++ b/tests/test_unit/test_cellfinder.py
@@ -1,5 +1,6 @@
 import json
 import logging
+import re
 from pathlib import Path
 
 import pooch
@@ -59,7 +60,7 @@ def test_read_cellfinder_config(input_config, input_configs_dir):
 
 
 @pytest.mark.parametrize(
-    "input_config, message",
+    "input_config, message_pattern",
     [
         (
             "input_data_GIN.json",
@@ -69,11 +70,14 @@ def test_read_cellfinder_config(input_config, input_configs_dir):
             "input_data_locally.json",
             "Fetching input data from the local directories",
         ),
-        ("input_data_missing_background.json", "The directory does not exist"),
-        ("input_data_missing_signal.json", "The directory does not exist"),
+        (
+            "input_data_missing_background.json",
+            "The directory .+ does not exist$",
+        ),
+        ("input_data_missing_signal.json", "The directory .+ does not exist$"),
         (
             "input_data_not_locally_or_GIN.json",
-            "Input data not found locally, and URL/hash to"
+            "Input data not found locally, and URL/hash to "
             "GIN repository not provided",
         ),
     ],
@@ -84,7 +88,7 @@ def test_add_signal_and_background_files(
     cellfinder_GIN_data,
     input_configs_dir,
     input_config,
-    message,
+    message_pattern,
 ):
     """_summary_
 
@@ -130,9 +134,10 @@ def test_add_signal_and_background_files(
     # monkeypatch cellfinder config:
     # - if config is "local" or "signal/background missing":
     #    ensure signal and background data from GIN are downloaded locally
-    if message in [
-        "Fetching input data from the local directories",
-        "The directory does not exist",
+    if input_config in [
+        "input_data_locally.json",
+        "input_data_missing_signal.json",
+        "input_data_missing_background.json",
     ]:
         # fetch data from GIN and download locally
         # download GIN data to specified local directory
@@ -151,7 +156,9 @@ def test_add_signal_and_background_files(
     # retrieve data
     add_signal_and_background_files(config)
 
-    assert message in caplog.messages[-1]
+    # match
+    out = re.fullmatch(message_pattern, caplog.messages[-1])
+    assert out.group() is not None
 
 
 # def test_setup_workflow(input_config_path):

From e0a2206c3aea026784b5af69b0b38d1d8dc78290 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Wed, 6 Dec 2023 16:08:18 +0000
Subject: [PATCH 13/54] fix to retrieve custom logger

---
 tests/test_unit/test_cellfinder.py | 20 +++++++++++++++++---
 workflows/cellfinder.py            |  4 ++--
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/tests/test_unit/test_cellfinder.py b/tests/test_unit/test_cellfinder.py
index cbca913c..6fb92b04 100644
--- a/tests/test_unit/test_cellfinder.py
+++ b/tests/test_unit/test_cellfinder.py
@@ -1,5 +1,4 @@
 import json
-import logging
 import re
 from pathlib import Path
 
@@ -10,6 +9,7 @@
     add_signal_and_background_files,
     read_cellfinder_config,
 )
+from workflows.utils import setup_logger
 
 
 @pytest.fixture()
@@ -107,7 +107,20 @@ def test_add_signal_and_background_files(
     """
     # set logger to capture
     # TODO: --- why root? :( "workflows.utils")
-    caplog.set_level(logging.DEBUG, logger="root")
+    # logging.getLogger("workflows.utils")
+    # By default, the root log level is WARN,
+    # so every log with lower level (for example INFO or DEBUG) will be ignored
+    # I need to create the logger here because if it doesnt exist,
+    # it creates one from root
+    # whose default level is WARN, and so DEBUG and INFO levels are ignored
+    # (By default, a new logger has the NOTSET level, and as the root logger
+    # has a WARN level, the logger’s effective level will be WARN.)
+    # One option is to set the root logger level here to DEBUG -- then its
+    # "children" will have DEBUG level too
+    # caplog.set_level(logging.DEBUG, logger="workflows.utils") #"root")
+    # A better option is to create our logger here before anything else
+    logger = setup_logger()
+    assert logger.name == "workflows.utils"
 
     # read json as Cellfinder config
     config = read_cellfinder_config(input_configs_dir / input_config)
@@ -156,7 +169,8 @@ def test_add_signal_and_background_files(
     # retrieve data
     add_signal_and_background_files(config)
 
-    # match
+    # check log messages
+    assert len(caplog.messages) > 0
     out = re.fullmatch(message_pattern, caplog.messages[-1])
     assert out.group() is not None
 
diff --git a/workflows/cellfinder.py b/workflows/cellfinder.py
index fd0ab582..5432fe80 100644
--- a/workflows/cellfinder.py
+++ b/workflows/cellfinder.py
@@ -149,7 +149,7 @@ def add_signal_and_background_files(
         for running cellfinder.
     """
     # Fetch logger
-    logger = logging.getLogger("workflow.utils")
+    logger = logging.getLogger("workflows.utils")
 
     # Check if input data directories (signal and background) exist locally.
     # If both directories exist, get list of signal and background files
@@ -258,7 +258,7 @@ def setup_workflow(input_config_path: Path) -> CellfinderConfig:
     """
 
     # Fetch logger
-    logger = logging.getLogger("workflow.utils")
+    logger = logging.getLogger("workflows.utils")
 
     # Check config file exists
     assert input_config_path.exists()

From 095e2e1bffe3ea122043bde786031b04dfd92dea Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Wed, 6 Dec 2023 16:08:45 +0000
Subject: [PATCH 14/54] remove notes about logger issue

---
 tests/test_unit/test_cellfinder.py | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/tests/test_unit/test_cellfinder.py b/tests/test_unit/test_cellfinder.py
index 6fb92b04..174fd431 100644
--- a/tests/test_unit/test_cellfinder.py
+++ b/tests/test_unit/test_cellfinder.py
@@ -105,20 +105,7 @@ def test_add_signal_and_background_files(
     message : _type_
         _description_
     """
-    # set logger to capture
-    # TODO: --- why root? :( "workflows.utils")
-    # logging.getLogger("workflows.utils")
-    # By default, the root log level is WARN,
-    # so every log with lower level (for example INFO or DEBUG) will be ignored
-    # I need to create the logger here because if it doesnt exist,
-    # it creates one from root
-    # whose default level is WARN, and so DEBUG and INFO levels are ignored
-    # (By default, a new logger has the NOTSET level, and as the root logger
-    # has a WARN level, the logger’s effective level will be WARN.)
-    # One option is to set the root logger level here to DEBUG -- then its
-    # "children" will have DEBUG level too
-    # caplog.set_level(logging.DEBUG, logger="workflows.utils") #"root")
-    # A better option is to create our logger here before anything else
+    # instantiate our custom logger
     logger = setup_logger()
     assert logger.name == "workflows.utils"
 

From a9dbbd2ef54afff130311018cefcdaf20e78219d Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Wed, 6 Dec 2023 16:19:48 +0000
Subject: [PATCH 15/54] small edits to comments

---
 tests/test_unit/test_cellfinder.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/tests/test_unit/test_cellfinder.py b/tests/test_unit/test_cellfinder.py
index 174fd431..78db01dd 100644
--- a/tests/test_unit/test_cellfinder.py
+++ b/tests/test_unit/test_cellfinder.py
@@ -113,7 +113,7 @@ def test_add_signal_and_background_files(
     config = read_cellfinder_config(input_configs_dir / input_config)
 
     # monkeypatch cellfinder config:
-    # - set install_path to pytest temp directory
+    # set install_path to pytest temporary directory
     config.install_path = tmp_path / config.install_path
 
     # check lists of signal and background files are not defined
@@ -132,16 +132,14 @@ def test_add_signal_and_background_files(
     )
 
     # monkeypatch cellfinder config:
-    # - if config is "local" or "signal/background missing":
-    #    ensure signal and background data from GIN are downloaded locally
+    # if config is "local" or "signal/background missing":
+    # ensure signal and background data from GIN are downloaded locally
     if input_config in [
         "input_data_locally.json",
         "input_data_missing_signal.json",
         "input_data_missing_background.json",
     ]:
         # fetch data from GIN and download locally
-        # download GIN data to specified local directory
-        # can I download data only once? - but then same pytest directory?
         pooch.retrieve(
             url=cellfinder_GIN_data["url"],
             known_hash=cellfinder_GIN_data["hash"],
@@ -153,7 +151,7 @@ def test_add_signal_and_background_files(
             ),
         )
 
-    # retrieve data
+    # add signal and background files lists to config
     add_signal_and_background_files(config)
 
     # check log messages
@@ -175,9 +173,12 @@ def test_add_signal_and_background_files(
 
 #     config = setup_workflow(input_config_path)
 
-#     assert config.list_signal_files # all files exist
-#     assert config.list_background_files  # all files exist
-#     assert config.output_path
+#     assert config.list_signal_files # check all files exist
+#     assert config.list_background_files  # check all files exist
+#     assert config.output_path # check path exists
+#     assert config.output_path # check timestamped format:
+# install_path /  str(config.output_path_basename_relative)
+# + timestamp_formatted
 # #---should be timestamped with this format strftime("%Y%m%d_%H%M%S")
-#     assert config.detected_cells_path
+#     assert config.detected_cells_path # check this field is defined
 # # should be config.output_path / config.detected_cells_filename

From 98385c5b8f818b391f3a566ed65401000c37e5bc Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Wed, 6 Dec 2023 16:29:12 +0000
Subject: [PATCH 16/54] make two modules for unit tests (workflows and
 benchmarks)

---
 tests/test_unit/benchmarks/__init__.py                        | 0
 tests/{ => test_unit}/data/input_data_GIN.json                | 0
 tests/{ => test_unit}/data/input_data_locally.json            | 0
 tests/{ => test_unit}/data/input_data_missing_background.json | 0
 tests/{ => test_unit}/data/input_data_missing_signal.json     | 0
 tests/{ => test_unit}/data/input_data_not_locally_or_GIN.json | 0
 tests/test_unit/workflows/__init__.py                         | 0
 tests/test_unit/{ => workflows}/test_cellfinder.py            | 0
 tests/test_unit/{ => workflows}/test_utils.py                 | 0
 9 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 tests/test_unit/benchmarks/__init__.py
 rename tests/{ => test_unit}/data/input_data_GIN.json (100%)
 rename tests/{ => test_unit}/data/input_data_locally.json (100%)
 rename tests/{ => test_unit}/data/input_data_missing_background.json (100%)
 rename tests/{ => test_unit}/data/input_data_missing_signal.json (100%)
 rename tests/{ => test_unit}/data/input_data_not_locally_or_GIN.json (100%)
 create mode 100644 tests/test_unit/workflows/__init__.py
 rename tests/test_unit/{ => workflows}/test_cellfinder.py (100%)
 rename tests/test_unit/{ => workflows}/test_utils.py (100%)

diff --git a/tests/test_unit/benchmarks/__init__.py b/tests/test_unit/benchmarks/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/data/input_data_GIN.json b/tests/test_unit/data/input_data_GIN.json
similarity index 100%
rename from tests/data/input_data_GIN.json
rename to tests/test_unit/data/input_data_GIN.json
diff --git a/tests/data/input_data_locally.json b/tests/test_unit/data/input_data_locally.json
similarity index 100%
rename from tests/data/input_data_locally.json
rename to tests/test_unit/data/input_data_locally.json
diff --git a/tests/data/input_data_missing_background.json b/tests/test_unit/data/input_data_missing_background.json
similarity index 100%
rename from tests/data/input_data_missing_background.json
rename to tests/test_unit/data/input_data_missing_background.json
diff --git a/tests/data/input_data_missing_signal.json b/tests/test_unit/data/input_data_missing_signal.json
similarity index 100%
rename from tests/data/input_data_missing_signal.json
rename to tests/test_unit/data/input_data_missing_signal.json
diff --git a/tests/data/input_data_not_locally_or_GIN.json b/tests/test_unit/data/input_data_not_locally_or_GIN.json
similarity index 100%
rename from tests/data/input_data_not_locally_or_GIN.json
rename to tests/test_unit/data/input_data_not_locally_or_GIN.json
diff --git a/tests/test_unit/workflows/__init__.py b/tests/test_unit/workflows/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_unit/test_cellfinder.py b/tests/test_unit/workflows/test_cellfinder.py
similarity index 100%
rename from tests/test_unit/test_cellfinder.py
rename to tests/test_unit/workflows/test_cellfinder.py
diff --git a/tests/test_unit/test_utils.py b/tests/test_unit/workflows/test_utils.py
similarity index 100%
rename from tests/test_unit/test_utils.py
rename to tests/test_unit/workflows/test_utils.py

From 289b2f85864ae9d2b2b27f20993a98702771c489 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Wed, 6 Dec 2023 18:57:42 +0000
Subject: [PATCH 17/54] renaming modules and add test for setup_workflow

---
 MANIFEST.in                                   | 10 +--
 .../__init__.py                               |  0
 .../cellfinder.py                             |  6 +-
 brainglobe_benchmarks/results/benchmarks.json | 88 ++++++++++++++++++
 .../d83f97ba-conda-py3.10.json                |  1 +
 .../machine.json                              |  9 ++
 .../__init__.py                               |  0
 .../cellfinder.py                             |  9 +-
 .../configs/cellfinder.json                   |  0
 {workflows => brainglobe_workflows}/utils.py  |  0
 .../{test_unit => }/data/input_data_GIN.json  |  0
 .../data/input_data_locally.json              |  0
 .../data/input_data_missing_background.json   |  0
 .../data/input_data_missing_signal.json       |  0
 .../data/input_data_not_locally_or_GIN.json   |  0
 .../brainglobe_workflows}/__init__.py         |  0
 .../brainglobe_workflows/test_cellfinder.py   | 15 ++++
 tests/test_integration/conftest.py            |  4 +-
 .../test_cellfinder_workflow.py               |  2 +-
 .../__init__.py                               |  0
 .../brainglobe_workflows/__init__.py          |  0
 .../test_cellfinder.py                        | 90 ++++++++++++++-----
 .../test_utils.py                             |  2 +-
 23 files changed, 198 insertions(+), 38 deletions(-)
 rename {benchmarks => brainglobe_benchmarks}/__init__.py (100%)
 rename {benchmarks => brainglobe_benchmarks}/cellfinder.py (97%)
 create mode 100644 brainglobe_benchmarks/results/benchmarks.json
 create mode 100644 brainglobe_benchmarks/results/eduroam-int-dhcp-97-153-159.ucl.ac.uk/d83f97ba-conda-py3.10.json
 create mode 100644 brainglobe_benchmarks/results/eduroam-int-dhcp-97-153-159.ucl.ac.uk/machine.json
 rename {workflows => brainglobe_workflows}/__init__.py (100%)
 rename {workflows => brainglobe_workflows}/cellfinder.py (98%)
 rename {workflows => brainglobe_workflows}/configs/cellfinder.json (100%)
 rename {workflows => brainglobe_workflows}/utils.py (100%)
 rename tests/{test_unit => }/data/input_data_GIN.json (100%)
 rename tests/{test_unit => }/data/input_data_locally.json (100%)
 rename tests/{test_unit => }/data/input_data_missing_background.json (100%)
 rename tests/{test_unit => }/data/input_data_missing_signal.json (100%)
 rename tests/{test_unit => }/data/input_data_not_locally_or_GIN.json (100%)
 rename tests/{test_unit/benchmarks => test_integration/brainglobe_workflows}/__init__.py (100%)
 create mode 100644 tests/test_integration/brainglobe_workflows/test_cellfinder.py
 rename tests/test_unit/{workflows => brainglobe_benchmarks}/__init__.py (100%)
 create mode 100644 tests/test_unit/brainglobe_workflows/__init__.py
 rename tests/test_unit/{workflows => brainglobe_workflows}/test_cellfinder.py (67%)
 rename tests/test_unit/{workflows => brainglobe_workflows}/test_utils.py (84%)

diff --git a/MANIFEST.in b/MANIFEST.in
index b47713ce..6caa3b13 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -2,8 +2,8 @@ include LICENSE
 include README.md
 exclude .pre-commit-config.yaml
 
-recursive-include workflows *.py
-recursive-include workflows *.json
+recursive-include brainglobe_workflows *.py
+recursive-include brainglobe_workflows *.json
 
 recursive-exclude * __pycache__
 recursive-exclude * *.py[co]
@@ -11,6 +11,6 @@ recursive-exclude docs *
 recursive-exclude tests *
 
 include asv.conf.json
-recursive-include benchmarks *.py
-recursive-include benchmarks *.json
-recursive-exclude benchmarks/results *
+recursive-include brainglobe_benchmarks *.py
+recursive-include brainglobe_benchmarks *.json
+recursive-exclude brainglobe_benchmarks/results *
diff --git a/benchmarks/__init__.py b/brainglobe_benchmarks/__init__.py
similarity index 100%
rename from benchmarks/__init__.py
rename to brainglobe_benchmarks/__init__.py
diff --git a/benchmarks/cellfinder.py b/brainglobe_benchmarks/cellfinder.py
similarity index 97%
rename from benchmarks/cellfinder.py
rename to brainglobe_benchmarks/cellfinder.py
index 69ba6988..e471aad7 100644
--- a/benchmarks/cellfinder.py
+++ b/brainglobe_benchmarks/cellfinder.py
@@ -7,12 +7,12 @@
 from cellfinder_core.main import main as cellfinder_run
 from cellfinder_core.tools.IO import read_with_dask
 
-from workflows.cellfinder import (
+from brainglobe_workflows.cellfinder import (
     CellfinderConfig,
     run_workflow_from_cellfinder_run,
 )
-from workflows.cellfinder import setup as setup_cellfinder_workflow
-from workflows.utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER
+from brainglobe_workflows.cellfinder import setup as setup_cellfinder_workflow
+from brainglobe_workflows.utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER
 
 
 class TimeBenchmarkPrepGIN:
diff --git a/brainglobe_benchmarks/results/benchmarks.json b/brainglobe_benchmarks/results/benchmarks.json
new file mode 100644
index 00000000..8fe97cef
--- /dev/null
+++ b/brainglobe_benchmarks/results/benchmarks.json
@@ -0,0 +1,88 @@
+{
+    "cellfinder.TimeDetectCells.time_cellfinder_run": {
+        "code": "class TimeDetectCells:\n    def time_cellfinder_run(self):\n        cellfinder_run(\n            self.signal_array, self.background_array, self.cfg.voxel_sizes\n        )\n\n    def setup(self):\n        # basic setup\n        TimeBenchmarkPrepGIN.setup(self)\n    \n        # add input data as arrays to config\n        self.signal_array = read_with_dask(self.cfg.signal_dir_path)\n        self.background_array = read_with_dask(self.cfg.background_dir_path)\n\nclass TimeBenchmarkPrepGIN:\n    def setup_cache(\n        self,\n    ):\n        \"\"\"\n        Download the input data from the GIN repository to the local\n        directory specified in the default_config.json\n    \n        Notes\n        -----\n        The `setup_cache` method only performs the computations once\n        per benchmark round and then caches the result to disk [1]_. It cannot\n        be parametrised [2]_.\n    \n    \n        [1] https://asv.readthedocs.io/en/latest/writing_benchmarks.html#setup-and-teardown-functions\n        [2] https://asv.readthedocs.io/en/latest/writing_benchmarks.html#parameterized-benchmarks\n        \"\"\"\n    \n        # Check config file exists\n        assert Path(self.input_config_path).exists()\n    \n        # Instantiate a CellfinderConfig from the input json file\n        # (assumes config is json serializable)\n        with open(self.input_config_path) as cfg:\n            config_dict = json.load(cfg)\n        config = CellfinderConfig(**config_dict)\n    \n        # Download data with pooch\n        _ = pooch.retrieve(\n            url=config.data_url,\n            known_hash=config.data_hash,\n            path=config.install_path,\n            progressbar=True,\n            processor=pooch.Unzip(extract_dir=config.extract_dir_relative),\n        )\n    \n        # Check paths to input data should now exist in config\n        assert Path(config.signal_dir_path).exists()\n        assert Path(config.background_dir_path).exists()",
+        "min_run_count": 2,
+        "name": "cellfinder.TimeDetectCells.time_cellfinder_run",
+        "number": 0,
+        "param_names": [],
+        "params": [],
+        "repeat": 0,
+        "rounds": 2,
+        "sample_time": 0.01,
+        "setup_cache_key": "cellfinder:84",
+        "timeout": 600,
+        "type": "time",
+        "unit": "seconds",
+        "version": "a37269e5549803517d14817f7e9aede2930f6ba97c3faeba7235de092a74f6c8",
+        "warmup_time": 0.1
+    },
+    "cellfinder.TimeFullWorkflow.time_workflow_from_cellfinder_run": {
+        "code": "class TimeFullWorkflow:\n    def time_workflow_from_cellfinder_run(self):\n        run_workflow_from_cellfinder_run(self.cfg)\n\nclass TimeBenchmarkPrepGIN:\n    def setup(self):\n        \"\"\"\n        Run the cellfinder workflow setup steps.\n    \n        The command line input arguments are injected as dependencies.\n        \"\"\"\n    \n        # Run setup\n        cfg = setup_cellfinder_workflow(\n            [\n                \"--config\",\n                self.input_config_path,\n            ]\n        )\n    \n        # Save configuration as attribute\n        self.cfg = cfg\n\n    def setup_cache(\n        self,\n    ):\n        \"\"\"\n        Download the input data from the GIN repository to the local\n        directory specified in the default_config.json\n    \n        Notes\n        -----\n        The `setup_cache` method only performs the computations once\n        per benchmark round and then caches the result to disk [1]_. It cannot\n        be parametrised [2]_.\n    \n    \n        [1] https://asv.readthedocs.io/en/latest/writing_benchmarks.html#setup-and-teardown-functions\n        [2] https://asv.readthedocs.io/en/latest/writing_benchmarks.html#parameterized-benchmarks\n        \"\"\"\n    \n        # Check config file exists\n        assert Path(self.input_config_path).exists()\n    \n        # Instantiate a CellfinderConfig from the input json file\n        # (assumes config is json serializable)\n        with open(self.input_config_path) as cfg:\n            config_dict = json.load(cfg)\n        config = CellfinderConfig(**config_dict)\n    \n        # Download data with pooch\n        _ = pooch.retrieve(\n            url=config.data_url,\n            known_hash=config.data_hash,\n            path=config.install_path,\n            progressbar=True,\n            processor=pooch.Unzip(extract_dir=config.extract_dir_relative),\n        )\n    \n        # Check paths to input data should now exist in config\n        assert Path(config.signal_dir_path).exists()\n        assert Path(config.background_dir_path).exists()",
+        "min_run_count": 2,
+        "name": "cellfinder.TimeFullWorkflow.time_workflow_from_cellfinder_run",
+        "number": 0,
+        "param_names": [],
+        "params": [],
+        "repeat": 0,
+        "rounds": 2,
+        "sample_time": 0.01,
+        "setup_cache_key": "cellfinder:84",
+        "timeout": 600,
+        "type": "time",
+        "unit": "seconds",
+        "version": "972079517fbc189a9acc0af554fc3c8382e2add170992dfde452a1b5180a7910",
+        "warmup_time": 0.1
+    },
+    "cellfinder.TimeReadInputDask.time_read_background_with_dask": {
+        "code": "class TimeReadInputDask:\n    def time_read_background_with_dask(self):\n        read_with_dask(self.cfg.background_dir_path)\n\nclass TimeBenchmarkPrepGIN:\n    def setup(self):\n        \"\"\"\n        Run the cellfinder workflow setup steps.\n    \n        The command line input arguments are injected as dependencies.\n        \"\"\"\n    \n        # Run setup\n        cfg = setup_cellfinder_workflow(\n            [\n                \"--config\",\n                self.input_config_path,\n            ]\n        )\n    \n        # Save configuration as attribute\n        self.cfg = cfg\n\n    def setup_cache(\n        self,\n    ):\n        \"\"\"\n        Download the input data from the GIN repository to the local\n        directory specified in the default_config.json\n    \n        Notes\n        -----\n        The `setup_cache` method only performs the computations once\n        per benchmark round and then caches the result to disk [1]_. It cannot\n        be parametrised [2]_.\n    \n    \n        [1] https://asv.readthedocs.io/en/latest/writing_benchmarks.html#setup-and-teardown-functions\n        [2] https://asv.readthedocs.io/en/latest/writing_benchmarks.html#parameterized-benchmarks\n        \"\"\"\n    \n        # Check config file exists\n        assert Path(self.input_config_path).exists()\n    \n        # Instantiate a CellfinderConfig from the input json file\n        # (assumes config is json serializable)\n        with open(self.input_config_path) as cfg:\n            config_dict = json.load(cfg)\n        config = CellfinderConfig(**config_dict)\n    \n        # Download data with pooch\n        _ = pooch.retrieve(\n            url=config.data_url,\n            known_hash=config.data_hash,\n            path=config.install_path,\n            progressbar=True,\n            processor=pooch.Unzip(extract_dir=config.extract_dir_relative),\n        )\n    \n        # Check paths to input data should now exist in config\n        assert Path(config.signal_dir_path).exists()\n        assert Path(config.background_dir_path).exists()",
+        "min_run_count": 2,
+        "name": "cellfinder.TimeReadInputDask.time_read_background_with_dask",
+        "number": 0,
+        "param_names": [],
+        "params": [],
+        "repeat": 0,
+        "rounds": 2,
+        "sample_time": 0.01,
+        "setup_cache_key": "cellfinder:84",
+        "timeout": 600,
+        "type": "time",
+        "unit": "seconds",
+        "version": "bf1a26a9a90c3686fe63ef495a40bda58641061ee504e8aa682554ff5f25506b",
+        "warmup_time": 0.1
+    },
+    "cellfinder.TimeReadInputDask.time_read_signal_with_dask": {
+        "code": "class TimeReadInputDask:\n    def time_read_signal_with_dask(self):\n        read_with_dask(self.cfg.signal_dir_path)\n\nclass TimeBenchmarkPrepGIN:\n    def setup(self):\n        \"\"\"\n        Run the cellfinder workflow setup steps.\n    \n        The command line input arguments are injected as dependencies.\n        \"\"\"\n    \n        # Run setup\n        cfg = setup_cellfinder_workflow(\n            [\n                \"--config\",\n                self.input_config_path,\n            ]\n        )\n    \n        # Save configuration as attribute\n        self.cfg = cfg\n\n    def setup_cache(\n        self,\n    ):\n        \"\"\"\n        Download the input data from the GIN repository to the local\n        directory specified in the default_config.json\n    \n        Notes\n        -----\n        The `setup_cache` method only performs the computations once\n        per benchmark round and then caches the result to disk [1]_. It cannot\n        be parametrised [2]_.\n    \n    \n        [1] https://asv.readthedocs.io/en/latest/writing_benchmarks.html#setup-and-teardown-functions\n        [2] https://asv.readthedocs.io/en/latest/writing_benchmarks.html#parameterized-benchmarks\n        \"\"\"\n    \n        # Check config file exists\n        assert Path(self.input_config_path).exists()\n    \n        # Instantiate a CellfinderConfig from the input json file\n        # (assumes config is json serializable)\n        with open(self.input_config_path) as cfg:\n            config_dict = json.load(cfg)\n        config = CellfinderConfig(**config_dict)\n    \n        # Download data with pooch\n        _ = pooch.retrieve(\n            url=config.data_url,\n            known_hash=config.data_hash,\n            path=config.install_path,\n            progressbar=True,\n            processor=pooch.Unzip(extract_dir=config.extract_dir_relative),\n        )\n    \n        # Check paths to input data should now exist in config\n        assert Path(config.signal_dir_path).exists()\n        assert Path(config.background_dir_path).exists()",
+        "min_run_count": 2,
+        "name": "cellfinder.TimeReadInputDask.time_read_signal_with_dask",
+        "number": 0,
+        "param_names": [],
+        "params": [],
+        "repeat": 0,
+        "rounds": 2,
+        "sample_time": 0.01,
+        "setup_cache_key": "cellfinder:84",
+        "timeout": 600,
+        "type": "time",
+        "unit": "seconds",
+        "version": "d01c253717edd52a017c710e569a36349c463b190e1ac4f0ab786c16cde75bd5",
+        "warmup_time": 0.1
+    },
+    "cellfinder.TimeSaveCells.time_save_cells": {
+        "code": "class TimeSaveCells:\n    def time_save_cells(self):\n        save_cells(self.detected_cells, self.cfg.detected_cells_path)\n\n    def setup(self):\n        # basic setup\n        TimeBenchmarkPrepGIN.setup(self)\n    \n        # add input data as arrays to config\n        self.signal_array = read_with_dask(self.cfg.signal_dir_path)\n        self.background_array = read_with_dask(self.cfg.background_dir_path)\n    \n        # detect cells\n        self.detected_cells = cellfinder_run(\n            self.signal_array, self.background_array, self.cfg.voxel_sizes\n        )\n\nclass TimeBenchmarkPrepGIN:\n    def setup_cache(\n        self,\n    ):\n        \"\"\"\n        Download the input data from the GIN repository to the local\n        directory specified in the default_config.json\n    \n        Notes\n        -----\n        The `setup_cache` method only performs the computations once\n        per benchmark round and then caches the result to disk [1]_. It cannot\n        be parametrised [2]_.\n    \n    \n        [1] https://asv.readthedocs.io/en/latest/writing_benchmarks.html#setup-and-teardown-functions\n        [2] https://asv.readthedocs.io/en/latest/writing_benchmarks.html#parameterized-benchmarks\n        \"\"\"\n    \n        # Check config file exists\n        assert Path(self.input_config_path).exists()\n    \n        # Instantiate a CellfinderConfig from the input json file\n        # (assumes config is json serializable)\n        with open(self.input_config_path) as cfg:\n            config_dict = json.load(cfg)\n        config = CellfinderConfig(**config_dict)\n    \n        # Download data with pooch\n        _ = pooch.retrieve(\n            url=config.data_url,\n            known_hash=config.data_hash,\n            path=config.install_path,\n            progressbar=True,\n            processor=pooch.Unzip(extract_dir=config.extract_dir_relative),\n        )\n    \n        # Check paths to input data should now exist in config\n        assert Path(config.signal_dir_path).exists()\n        assert Path(config.background_dir_path).exists()",
+        "min_run_count": 2,
+        "name": "cellfinder.TimeSaveCells.time_save_cells",
+        "number": 0,
+        "param_names": [],
+        "params": [],
+        "repeat": 0,
+        "rounds": 2,
+        "sample_time": 0.01,
+        "setup_cache_key": "cellfinder:84",
+        "timeout": 600,
+        "type": "time",
+        "unit": "seconds",
+        "version": "7c1d5133e9b37b475e715b17d7d1ffaf8094679919a0ff1b8ac5625936d8af91",
+        "warmup_time": 0.1
+    },
+    "version": 2
+}
diff --git a/brainglobe_benchmarks/results/eduroam-int-dhcp-97-153-159.ucl.ac.uk/d83f97ba-conda-py3.10.json b/brainglobe_benchmarks/results/eduroam-int-dhcp-97-153-159.ucl.ac.uk/d83f97ba-conda-py3.10.json
new file mode 100644
index 00000000..f4009e68
--- /dev/null
+++ b/brainglobe_benchmarks/results/eduroam-int-dhcp-97-153-159.ucl.ac.uk/d83f97ba-conda-py3.10.json
@@ -0,0 +1 @@
+{"commit_hash": "d83f97ba87ce4298084b16e45745cc33834b6a5e", "env_name": "conda-py3.10", "date": 1697216056000, "params": {"arch": "arm64", "cpu": "Apple M1 Pro", "machine": "eduroam-int-dhcp-97-153-159.ucl.ac.uk", "num_cpu": "10", "os": "Darwin 23.0.0", "ram": "34359738368", "python": "3.10"}, "python": "3.10", "requirements": {}, "env_vars": {}, "result_columns": ["result", "params", "version", "started_at", "duration", "stats_ci_99_a", "stats_ci_99_b", "stats_q_25", "stats_q_75", "stats_number", "stats_repeat", "samples", "profile"], "results": {"cellfinder.TimeDetectCells.time_cellfinder_run": [[13.403959749994101], [], "a37269e5549803517d14817f7e9aede2930f6ba97c3faeba7235de092a74f6c8", 1697453708463, 72.497, [-34.622], [61.43], [12.924], [13.884], [1], [2]], "cellfinder.TimeFullWorkflow.time_workflow_from_cellfinder_run": [[13.255898479488678], [], "972079517fbc189a9acc0af554fc3c8382e2add170992dfde452a1b5180a7910", 1697453742279, 68.613, [-26.059], [52.571], [12.863], [13.649], [1], [2]], "cellfinder.TimeReadInputDask.time_read_background_with_dask": [[0.0021500665956409645], [], "bf1a26a9a90c3686fe63ef495a40bda58641061ee504e8aa682554ff5f25506b", 1697453777825, 8.1392, [0.0019665], [0.0026427], [0.0020579], [0.0022618], [5], [10]], "cellfinder.TimeReadInputDask.time_read_signal_with_dask": [[0.002099774999078363], [], "d01c253717edd52a017c710e569a36349c463b190e1ac4f0ab786c16cde75bd5", 1697453781751, 7.5877, [0.0019399], [0.0026163], [0.0020621], [0.0022299], [5], [10]], "cellfinder.TimeSaveCells.time_save_cells": [[0.0007860087889160863], [], "7c1d5133e9b37b475e715b17d7d1ffaf8094679919a0ff1b8ac5625936d8af91", 1697453785631, 67.869, [-0.0078589], [0.009431], [0.00069956], [0.00087246], [19], [2]]}, "durations": {"<setup_cache cellfinder:84>": 194.625663}, "version": 2}
diff --git a/brainglobe_benchmarks/results/eduroam-int-dhcp-97-153-159.ucl.ac.uk/machine.json b/brainglobe_benchmarks/results/eduroam-int-dhcp-97-153-159.ucl.ac.uk/machine.json
new file mode 100644
index 00000000..d08e58cd
--- /dev/null
+++ b/brainglobe_benchmarks/results/eduroam-int-dhcp-97-153-159.ucl.ac.uk/machine.json
@@ -0,0 +1,9 @@
+{
+    "arch": "arm64",
+    "cpu": "Apple M1 Pro",
+    "machine": "eduroam-int-dhcp-97-153-159.ucl.ac.uk",
+    "num_cpu": "10",
+    "os": "Darwin 23.0.0",
+    "ram": "34359738368",
+    "version": 1
+}
diff --git a/workflows/__init__.py b/brainglobe_workflows/__init__.py
similarity index 100%
rename from workflows/__init__.py
rename to brainglobe_workflows/__init__.py
diff --git a/workflows/cellfinder.py b/brainglobe_workflows/cellfinder.py
similarity index 98%
rename from workflows/cellfinder.py
rename to brainglobe_workflows/cellfinder.py
index 5432fe80..ce441d8f 100644
--- a/workflows/cellfinder.py
+++ b/brainglobe_workflows/cellfinder.py
@@ -33,7 +33,10 @@
 from cellfinder_core.tools.IO import read_with_dask
 from cellfinder_core.train.train_yml import depth_type
 
-from workflows.utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER, setup_logger
+from brainglobe_workflows.utils import (
+    DEFAULT_JSON_CONFIG_PATH_CELLFINDER,
+    setup_logger,
+)
 
 Pathlike = Union[str, os.PathLike]
 
@@ -149,7 +152,7 @@ def add_signal_and_background_files(
         for running cellfinder.
     """
     # Fetch logger
-    logger = logging.getLogger("workflows.utils")
+    logger = logging.getLogger("brainglobe_workflows.utils")
 
     # Check if input data directories (signal and background) exist locally.
     # If both directories exist, get list of signal and background files
@@ -258,7 +261,7 @@ def setup_workflow(input_config_path: Path) -> CellfinderConfig:
     """
 
     # Fetch logger
-    logger = logging.getLogger("workflows.utils")
+    logger = logging.getLogger("brainglobe_workflows.utils")
 
     # Check config file exists
     assert input_config_path.exists()
diff --git a/workflows/configs/cellfinder.json b/brainglobe_workflows/configs/cellfinder.json
similarity index 100%
rename from workflows/configs/cellfinder.json
rename to brainglobe_workflows/configs/cellfinder.json
diff --git a/workflows/utils.py b/brainglobe_workflows/utils.py
similarity index 100%
rename from workflows/utils.py
rename to brainglobe_workflows/utils.py
diff --git a/tests/test_unit/data/input_data_GIN.json b/tests/data/input_data_GIN.json
similarity index 100%
rename from tests/test_unit/data/input_data_GIN.json
rename to tests/data/input_data_GIN.json
diff --git a/tests/test_unit/data/input_data_locally.json b/tests/data/input_data_locally.json
similarity index 100%
rename from tests/test_unit/data/input_data_locally.json
rename to tests/data/input_data_locally.json
diff --git a/tests/test_unit/data/input_data_missing_background.json b/tests/data/input_data_missing_background.json
similarity index 100%
rename from tests/test_unit/data/input_data_missing_background.json
rename to tests/data/input_data_missing_background.json
diff --git a/tests/test_unit/data/input_data_missing_signal.json b/tests/data/input_data_missing_signal.json
similarity index 100%
rename from tests/test_unit/data/input_data_missing_signal.json
rename to tests/data/input_data_missing_signal.json
diff --git a/tests/test_unit/data/input_data_not_locally_or_GIN.json b/tests/data/input_data_not_locally_or_GIN.json
similarity index 100%
rename from tests/test_unit/data/input_data_not_locally_or_GIN.json
rename to tests/data/input_data_not_locally_or_GIN.json
diff --git a/tests/test_unit/benchmarks/__init__.py b/tests/test_integration/brainglobe_workflows/__init__.py
similarity index 100%
rename from tests/test_unit/benchmarks/__init__.py
rename to tests/test_integration/brainglobe_workflows/__init__.py
diff --git a/tests/test_integration/brainglobe_workflows/test_cellfinder.py b/tests/test_integration/brainglobe_workflows/test_cellfinder.py
new file mode 100644
index 00000000..2dc57175
--- /dev/null
+++ b/tests/test_integration/brainglobe_workflows/test_cellfinder.py
@@ -0,0 +1,15 @@
+# from brainglobe_workflows.cellfinder import setup
+
+
+# def test_setup(input_config_path):
+#     setup(input_config_path)
+
+# check logger exists
+
+# check CellfinderConfig config correct?
+
+
+# def test_run_workflow_from_cellfinder_run
+
+
+# def test_main
diff --git a/tests/test_integration/conftest.py b/tests/test_integration/conftest.py
index c8729b62..72e91c5a 100644
--- a/tests/test_integration/conftest.py
+++ b/tests/test_integration/conftest.py
@@ -5,7 +5,7 @@
 import pooch
 import pytest
 
-from workflows.cellfinder import CellfinderConfig
+from brainglobe_workflows.cellfinder import CellfinderConfig
 
 
 def make_config_dict_fetch_from_local(cellfinder_cache_dir: Path) -> dict:
@@ -176,7 +176,7 @@ def default_json_config_path() -> Path:
     Path
         path to the json file with the default config parameters
     """
-    from workflows.utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER
+    from brainglobe_workflows.utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER
 
     return DEFAULT_JSON_CONFIG_PATH_CELLFINDER
 
diff --git a/tests/test_integration/test_cellfinder_workflow.py b/tests/test_integration/test_cellfinder_workflow.py
index c0fe19b8..2bf4ef39 100644
--- a/tests/test_integration/test_cellfinder_workflow.py
+++ b/tests/test_integration/test_cellfinder_workflow.py
@@ -3,7 +3,7 @@
 import sys
 from pathlib import Path
 
-from workflows.cellfinder import CellfinderConfig
+from brainglobe_workflows.cellfinder import CellfinderConfig
 
 
 def test_run_with_default_config(tmp_path, default_json_config_path):
diff --git a/tests/test_unit/workflows/__init__.py b/tests/test_unit/brainglobe_benchmarks/__init__.py
similarity index 100%
rename from tests/test_unit/workflows/__init__.py
rename to tests/test_unit/brainglobe_benchmarks/__init__.py
diff --git a/tests/test_unit/brainglobe_workflows/__init__.py b/tests/test_unit/brainglobe_workflows/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_unit/workflows/test_cellfinder.py b/tests/test_unit/brainglobe_workflows/test_cellfinder.py
similarity index 67%
rename from tests/test_unit/workflows/test_cellfinder.py
rename to tests/test_unit/brainglobe_workflows/test_cellfinder.py
index 78db01dd..fd3c5550 100644
--- a/tests/test_unit/workflows/test_cellfinder.py
+++ b/tests/test_unit/brainglobe_workflows/test_cellfinder.py
@@ -5,16 +5,17 @@
 import pooch
 import pytest
 
-from workflows.cellfinder import (
+from brainglobe_workflows.cellfinder import (
     add_signal_and_background_files,
     read_cellfinder_config,
+    setup_workflow,
 )
-from workflows.utils import setup_logger
+from brainglobe_workflows.utils import setup_logger
 
 
 @pytest.fixture()
 def input_configs_dir():
-    return Path(__file__).parents[1] / "data"
+    return Path(__file__).parents[2] / "data"
 
 
 @pytest.fixture(scope="session")
@@ -32,6 +33,18 @@ def cellfinder_GIN_data() -> dict:
     }
 
 
+@pytest.fixture()
+def input_config_default():
+    from brainglobe_workflows.utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER
+
+    return DEFAULT_JSON_CONFIG_PATH_CELLFINDER
+
+
+@pytest.fixture()
+def input_config_fetch_GIN(input_configs_dir):
+    return input_configs_dir / "input_data_GIN.json"
+
+
 @pytest.mark.parametrize(
     "input_config",
     [
@@ -106,8 +119,7 @@ def test_add_signal_and_background_files(
         _description_
     """
     # instantiate our custom logger
-    logger = setup_logger()
-    assert logger.name == "workflows.utils"
+    _ = setup_logger()
 
     # read json as Cellfinder config
     config = read_cellfinder_config(input_configs_dir / input_config)
@@ -160,25 +172,57 @@ def test_add_signal_and_background_files(
     assert out.group() is not None
 
 
-# def test_setup_workflow(input_config_path):
-#     """Test setup steps for the cellfinder workflow are completed
+@pytest.mark.parametrize(
+    "input_config, message",
+    [
+        ("input_config_default", "Using default config file"),
+        ("input_config_fetch_GIN", "Input config read from"),
+    ],
+)
+def test_setup_workflow(
+    input_config, message, monkeypatch, tmp_path, caplog, request
+):
+    """Test setup steps for the cellfinder workflow are completed
+
+    These setup steps include:
+    - instantiating a CellfinderConfig object with the required parameters,
+    - add signal and background files to config if these do not exist
+    - creating a timestamped directory for the output of the workflow if
+    it doesn't exist and adding its path to the config
+    """
+
+    # setup logger
+    _ = setup_logger()
+
+    # monkeypatch to change current directory to
+    # pytest temporary directory
+    # (cellfinder cache directory is created in cwd)
+    monkeypatch.chdir(tmp_path)
+
+    # setup workflow
+    config = setup_workflow(request.getfixturevalue(input_config))
 
-#     These setup steps include:
-#     - instantiating a CellfinderConfig object with the required parameters,
-#     - add signal and background files to config if these do not exist
-#     - creating a timestamped directory for the output of the workflow if
-#     it doesn't exist and adding its path to the config
-#     """
+    # check logs
+    assert message in caplog.text
 
+    # check all signal files exist
+    assert all([Path(f).is_file() for f in config.list_signal_files])
 
-#     config = setup_workflow(input_config_path)
+    # check all background files exist
+    assert all([Path(f).is_file() for f in config.list_background_files])
 
-#     assert config.list_signal_files # check all files exist
-#     assert config.list_background_files  # check all files exist
-#     assert config.output_path # check path exists
-#     assert config.output_path # check timestamped format:
-# install_path /  str(config.output_path_basename_relative)
-# + timestamp_formatted
-# #---should be timestamped with this format strftime("%Y%m%d_%H%M%S")
-#     assert config.detected_cells_path # check this field is defined
-# # should be config.output_path / config.detected_cells_filename
+    # check output directory exists
+    assert Path(config.output_path).resolve().is_dir()
+
+    # check output directory name has correct format
+    out = re.fullmatch(
+        config.output_path_basename_relative + "\\d{8}_\\d{6}$",
+        Path(config.output_path).stem,
+    )
+    assert out.group() is not None
+
+    # check output file path
+    assert (
+        config.detected_cells_path
+        == config.output_path / config.detected_cells_filename
+    )
diff --git a/tests/test_unit/workflows/test_utils.py b/tests/test_unit/brainglobe_workflows/test_utils.py
similarity index 84%
rename from tests/test_unit/workflows/test_utils.py
rename to tests/test_unit/brainglobe_workflows/test_utils.py
index 2b29e150..92d78d20 100644
--- a/tests/test_unit/workflows/test_utils.py
+++ b/tests/test_unit/brainglobe_workflows/test_utils.py
@@ -1,6 +1,6 @@
 import logging
 
-from workflows.utils import setup_logger
+from brainglobe_workflows.utils import setup_logger
 
 
 def test_setup_logger():

From edce94c4ba1576420059e9138862ccce43707f65 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Wed, 6 Dec 2023 20:10:57 +0000
Subject: [PATCH 18/54] remove old workflow tests and conftest

---
 .../test_cellfinder_benchmarks.py             |   0
 tests/test_integration/conftest.py            | 288 ------------------
 .../test_cellfinder_workflow.py               | 208 -------------
 3 files changed, 496 deletions(-)
 rename tests/test_integration/{ => brainglobe_benchmarks}/test_cellfinder_benchmarks.py (100%)
 delete mode 100644 tests/test_integration/conftest.py
 delete mode 100644 tests/test_integration/test_cellfinder_workflow.py

diff --git a/tests/test_integration/test_cellfinder_benchmarks.py b/tests/test_integration/brainglobe_benchmarks/test_cellfinder_benchmarks.py
similarity index 100%
rename from tests/test_integration/test_cellfinder_benchmarks.py
rename to tests/test_integration/brainglobe_benchmarks/test_cellfinder_benchmarks.py
diff --git a/tests/test_integration/conftest.py b/tests/test_integration/conftest.py
deleted file mode 100644
index 72e91c5a..00000000
--- a/tests/test_integration/conftest.py
+++ /dev/null
@@ -1,288 +0,0 @@
-import json
-from pathlib import Path
-from typing import Any
-
-import pooch
-import pytest
-
-from brainglobe_workflows.cellfinder import CellfinderConfig
-
-
-def make_config_dict_fetch_from_local(cellfinder_cache_dir: Path) -> dict:
-    """Generate a config dictionary with the required parameters
-    for the workflow
-
-    The input data is assumed to be locally at cellfinder_cache_dir.
-    The results are saved in a timestamped output subdirectory under
-    cellfinder_cache_dir
-
-    Parameters
-    ----------
-    cellfinder_cache_dir : Path
-        Path to the directory where the downloaded input data will be unzipped,
-        and the output will be saved
-
-    Returns
-    -------
-    dict
-        dictionary with the required parameters for the workflow
-    """
-    return {
-        "install_path": cellfinder_cache_dir,
-        "data_dir_relative": "cellfinder_test_data",  # relative path
-        "signal_subdir": "signal",
-        "background_subdir": "background",
-        "output_path_basename_relative": "cellfinder_output_",
-        "detected_cells_filename": "detected_cells.xml",
-        "voxel_sizes": [5, 2, 2],  # microns
-        "start_plane": 0,
-        "end_plane": -1,
-        "trained_model": None,  # if None, it will use a default model
-        "model_weights": None,
-        "model": "resnet50_tv",
-        "batch_size": 32,
-        "n_free_cpus": 2,
-        "network_voxel_sizes": [5, 1, 1],
-        "soma_diameter": 16,
-        "ball_xy_size": 6,
-        "ball_z_size": 15,
-        "ball_overlap_fraction": 0.6,
-        "log_sigma_size": 0.2,
-        "n_sds_above_mean_thresh": 10,
-        "soma_spread_factor": 1.4,
-        "max_cluster_size": 100000,
-        "cube_width": 50,
-        "cube_height": 50,
-        "cube_depth": 20,
-        "network_depth": "50",
-    }
-
-
-def make_config_dict_fetch_from_GIN(
-    cellfinder_cache_dir: Path,
-    data_url: str,
-    data_hash: str,
-) -> dict:
-    """Generate a config dictionary with the required parameters
-    for the workflow
-
-    The input data is fetched from GIN and downloaded to cellfinder_cache_dir.
-    The results are also saved in a timestamped output subdirectory under
-    cellfinder_cache_dir
-
-    Parameters
-    ----------
-    cellfinder_cache_dir : Path
-        Path to the directory where the downloaded input data will be unzipped,
-        and the output will be saved
-    data_url: str
-        URL to the GIN repository with the data to download
-    data_hash: str
-        Hash of the data to download
-
-    Returns
-    -------
-    dict
-        dictionary with the required parameters for the workflow
-    """
-
-    config = make_config_dict_fetch_from_local(cellfinder_cache_dir)
-    config["data_url"] = data_url
-    config["data_hash"] = data_hash
-
-    return config
-
-
-def prep_json(obj: Any) -> Any:
-    """
-    Returns a JSON encodable version of the input object.
-
-    It uses the JSON default encoder for all objects
-    except those of type `Path`.
-
-
-    Parameters
-    ----------
-    obj : Any
-        _description_
-
-    Returns
-    -------
-    Any
-        JSON serializable version of input object
-    """
-    if isinstance(obj, Path):
-        return str(obj)
-    else:
-        json_decoder = json.JSONEncoder()
-        return json_decoder.default(obj)
-
-
-@pytest.fixture(autouse=True)
-def cellfinder_cache_dir(tmp_path: Path) -> Path:
-    """Create a .cellfinder_workflows directory
-    under a temporary pytest directory and return
-    its path.
-
-    The temporary directory is available via pytest's tmp_path
-    fixture. A new temporary directory is created every function call
-    (i.e., scope="function")
-
-    Parameters
-    ----------
-    tmp_path : Path
-        path to pytest-generated temporary directory
-
-    Returns
-    -------
-    Path
-        path to the created cellfinder_workflows cache directory
-    """
-
-    return Path(tmp_path) / ".cellfinder_workflows"
-
-
-@pytest.fixture(scope="session")
-def data_url() -> str:
-    """Return the URL to the GIN repository with the input data
-
-    Returns
-    -------
-    str
-        URL to the GIN repository with the input data
-    """
-    return "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip"
-
-
-@pytest.fixture(scope="session")
-def data_hash() -> str:
-    """Return the hash of the GIN input data
-
-    Returns
-    -------
-    str
-        Hash to the GIN input data
-    """
-    return "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914"
-
-
-@pytest.fixture(scope="session")
-def default_json_config_path() -> Path:
-    """Return the path to the json file
-    with the default config parameters for cellfinder
-
-    Returns
-    -------
-    Path
-        path to the json file with the default config parameters
-    """
-    from brainglobe_workflows.utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER
-
-    return DEFAULT_JSON_CONFIG_PATH_CELLFINDER
-
-
-@pytest.fixture()
-def path_to_config_fetch_GIN(
-    tmp_path: Path, cellfinder_cache_dir: Path, data_url: str, data_hash: str
-) -> Path:
-    """Create an input config that fetches data from GIN and
-    return its path
-
-    Parameters
-    ----------
-    tmp_path : Path
-        path to a fresh pytest-generated temporary directory. The
-        generated config is saved here.
-
-    cellfinder_cache_dir : Path
-        path to the cellfinder cache directory, where the paths
-        in the config should point to.
-
-    data_url: str
-        URL to the GIN repository with the input data
-
-    data_hash: str
-        hash to the GIN input data
-
-    Returns
-    -------
-    input_config_path : Path
-        path to config file that fetches data from GIN
-    """
-    # create config dict
-    config_dict = make_config_dict_fetch_from_GIN(
-        cellfinder_cache_dir, data_url, data_hash
-    )
-
-    # create a temp json file to dump config data
-    input_config_path = (
-        tmp_path / "input_config.json"
-    )  # save it in a temp dir separate from cellfinder_cache_dir
-
-    # save config data to json file
-    with open(input_config_path, "w") as js:
-        json.dump(config_dict, js, default=prep_json)
-
-    # check json file exists
-    assert Path(input_config_path).is_file()
-
-    return input_config_path
-
-
-@pytest.fixture()
-def path_to_config_fetch_local(
-    tmp_path: Path, cellfinder_cache_dir: Path, data_url: str, data_hash: str
-) -> Path:
-    """Create an input config that points to local data and
-    return its path.
-
-    The local data is downloaded from GIN, but no reference
-    to the GIN repository is included in the config.
-
-    Parameters
-    ----------
-    tmp_path : Path
-        path to a fresh pytest-generated temporary directory. The
-        generated config is saved here.
-
-    cellfinder_cache_dir : Path
-        path to the cellfinder cache directory, where the paths
-        in the config should point to.
-
-    data_url: str
-        URL to the GIN repository with the input data
-
-    data_hash: str
-        hash to the GIN input data
-
-    Returns
-    -------
-    path_to_config_fetch_GIN : Path
-        path to a config file that fetches data from GIN
-    """
-
-    # instantiate basic config (assumes data is local)
-    config_dict = make_config_dict_fetch_from_local(cellfinder_cache_dir)
-    config = CellfinderConfig(**config_dict)
-
-    # download GIN data to specified local directory
-    pooch.retrieve(
-        url=data_url,
-        known_hash=data_hash,
-        path=config.install_path,  # path to download zip to
-        progressbar=True,
-        processor=pooch.Unzip(
-            extract_dir=config.data_dir_relative
-            # path to unzipped dir, *relative*  to 'path'
-        ),
-    )
-
-    # save config to json
-    input_config_path = tmp_path / "input_config.json"
-    with open(input_config_path, "w") as js:
-        json.dump(config_dict, js, default=prep_json)
-
-    # check json file exists
-    assert Path(input_config_path).is_file()
-
-    return input_config_path
diff --git a/tests/test_integration/test_cellfinder_workflow.py b/tests/test_integration/test_cellfinder_workflow.py
deleted file mode 100644
index 2bf4ef39..00000000
--- a/tests/test_integration/test_cellfinder_workflow.py
+++ /dev/null
@@ -1,208 +0,0 @@
-import json
-import subprocess
-import sys
-from pathlib import Path
-
-from brainglobe_workflows.cellfinder import CellfinderConfig
-
-
-def test_run_with_default_config(tmp_path, default_json_config_path):
-    """Test workflow run with no command line arguments
-
-    If no command line arguments are provided, the default
-    config at brainglobe_workflows/cellfinder/default_config.json
-    should be used.
-
-    After the workflow is run we check that:
-    - there are no errors (via returncode),
-    - the logs reflect the default config file was used, and
-    - a single output directory exists with the expected
-      output file inside it
-
-    Parameters
-    ----------
-    tmp_path : Path
-        path to a pytest-generated temporary directory.
-    """
-
-    # run workflow with no CLI arguments,
-    # with cwd=tmp_path
-    subprocess_output = subprocess.run(
-        [
-            sys.executable,
-            Path(__file__).resolve().parents[2]
-            / "workflows"
-            / "cellfinder.py",
-        ],
-        cwd=tmp_path,  # -------------------
-        stdout=subprocess.PIPE,
-        stderr=subprocess.STDOUT,
-        text=True,
-        encoding="utf-8",
-    )
-
-    # check returncode
-    assert subprocess_output.returncode == 0
-
-    # check logs
-    assert "Using default config file" in subprocess_output.stdout
-
-    # Check one output directory exists and has expected
-    # output file inside it
-    assert_outputs(default_json_config_path, tmp_path)
-
-
-def test_run_with_GIN_data(
-    path_to_config_fetch_GIN,
-):
-    """Test workflow runs when passing a config that fetches data
-    from the GIN repository
-
-    After the workflow is run we check that:
-    - there are no errors (via returncode),
-    - the logs reflect the input config file was used,
-    - the logs reflect the data was downloaded from GIN, and
-    - a single output directory exists with the expected
-      output file inside it
-
-    Parameters
-    ----------
-    tmp_path : Path
-        path to a pytest-generated temporary directory.
-    """
-    # run workflow with CLI and capture log
-    subprocess_output = subprocess.run(
-        [
-            sys.executable,
-            Path(__file__).resolve().parents[2]
-            / "workflows"
-            / "cellfinder.py",
-            "--config",
-            str(path_to_config_fetch_GIN),
-        ],
-        stdout=subprocess.PIPE,
-        stderr=subprocess.STDOUT,
-        text=True,
-        encoding="utf-8",
-    )
-
-    # check returncode
-    assert subprocess_output.returncode == 0
-
-    # check logs
-    assert (
-        f"Input config read from {str(path_to_config_fetch_GIN)}"
-        in subprocess_output.stdout
-    )
-    assert (
-        "Fetching input data from the provided GIN repository"
-        in subprocess_output.stdout
-    )
-
-    # check one output directory exists and
-    # has expected output file inside it
-    assert_outputs(path_to_config_fetch_GIN)
-
-
-def test_run_with_local_data(
-    path_to_config_fetch_local,
-):
-    """Test workflow runs when passing a config that uses
-    local data
-
-    After the workflow is run we check that:
-    - there are no errors (via returncode),
-    - the logs reflect the input config file was used,
-    - the logs reflect the data was found locally, and
-    - a single output directory exists with the expected
-      output file inside it
-
-    Parameters
-    ----------
-    tmp_path : Path
-        path to a pytest-generated temporary directory.
-    """
-
-    # run workflow with CLI
-    subprocess_output = subprocess.run(
-        [
-            sys.executable,
-            Path(__file__).resolve().parents[2]
-            / "workflows"
-            / "cellfinder.py",
-            "--config",
-            str(path_to_config_fetch_local),
-        ],
-        stdout=subprocess.PIPE,
-        stderr=subprocess.STDOUT,
-        text=True,
-        encoding="utf-8",
-    )
-
-    # check returncode
-    assert subprocess_output.returncode == 0
-
-    # check logs
-    assert (
-        f"Input config read from {str(path_to_config_fetch_local)}"
-        in subprocess_output.stdout
-    )
-    assert (
-        "Fetching input data from the local directories"
-        in subprocess_output.stdout
-    )
-
-    # check one output directory exists and
-    # has expected output file inside it
-    assert_outputs(path_to_config_fetch_local)
-
-
-def assert_outputs(path_to_config, parent_dir_of_install_path=""):
-    """Helper function to determine whether the output is
-    as expected.
-
-    It checks that:
-     - a single output directory exists, and
-     - the expected output file exists inside it
-
-    Note that config.output_path is only defined after the workflow
-    setup is run, because its name is timestamped. Therefore,
-    we search for an output directory based on config.output_path_basename.
-
-    Parameters
-    ----------
-    path_to_config : Path
-        path to the input config used to generate the
-        output.
-
-    parent_dir_of_install_path : str, optional
-        If the install_path in the input config is relative to the
-        directory the script is launched from (as is the case in the
-        default_config.json file), the absolute path to its parent_dir
-        must be specified here. If the paths to install_path is
-        absolute, this input is not required. By default "".
-    """
-
-    # load input config
-    with open(path_to_config) as config:
-        config_dict = json.load(config)
-    config = CellfinderConfig(**config_dict)
-
-    # check one output directory exists and
-    # it has expected output file inside it
-    output_path_without_timestamp = (
-        Path(parent_dir_of_install_path)
-        / config.install_path
-        / config.output_path_basename_relative
-    )
-    output_path_timestamped = [
-        x
-        for x in output_path_without_timestamp.parent.glob("*")
-        if x.is_dir() and x.name.startswith(output_path_without_timestamp.name)
-    ]
-
-    assert len(output_path_timestamped) == 1
-    assert (output_path_timestamped[0]).exists()
-    assert (
-        output_path_timestamped[0] / config.detected_cells_filename
-    ).is_file()

From 0bab20f5c09be5cc52199a8afd81424e56a0a8e1 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Wed, 6 Dec 2023 20:11:32 +0000
Subject: [PATCH 19/54] take logger name from module

---
 brainglobe_workflows/cellfinder.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/brainglobe_workflows/cellfinder.py b/brainglobe_workflows/cellfinder.py
index ce441d8f..6bcefda3 100644
--- a/brainglobe_workflows/cellfinder.py
+++ b/brainglobe_workflows/cellfinder.py
@@ -37,6 +37,7 @@
     DEFAULT_JSON_CONFIG_PATH_CELLFINDER,
     setup_logger,
 )
+from brainglobe_workflows.utils import __name__ as LOGGER_NAME
 
 Pathlike = Union[str, os.PathLike]
 
@@ -101,7 +102,7 @@ class CellfinderConfig:
     background_dir_path: Pathlike = ""
 
 
-def read_cellfinder_config(input_config_path):
+def read_cellfinder_config(input_config_path: Path):
     """Instantiate a CellfinderConfig from the input json file
     (assumes config is json serializable)
 
@@ -152,7 +153,7 @@ def add_signal_and_background_files(
         for running cellfinder.
     """
     # Fetch logger
-    logger = logging.getLogger("brainglobe_workflows.utils")
+    logger = logging.getLogger(LOGGER_NAME)
 
     # Check if input data directories (signal and background) exist locally.
     # If both directories exist, get list of signal and background files
@@ -261,7 +262,7 @@ def setup_workflow(input_config_path: Path) -> CellfinderConfig:
     """
 
     # Fetch logger
-    logger = logging.getLogger("brainglobe_workflows.utils")
+    logger = logging.getLogger(LOGGER_NAME)
 
     # Check config file exists
     assert input_config_path.exists()
@@ -310,7 +311,7 @@ def setup_workflow(input_config_path: Path) -> CellfinderConfig:
     return config
 
 
-def setup(input_config_path) -> CellfinderConfig:
+def setup(input_config_path: str) -> CellfinderConfig:
     # setup logger
     _ = setup_logger()
 

From b4d585f6b90edbe422dc40cf4583ffcc3ce187b6 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Wed, 6 Dec 2023 20:12:29 +0000
Subject: [PATCH 20/54] add integration tests and conftest for fixtures shared
 between unit and integration

---
 tests/conftest.py                             | 15 ++++++
 .../brainglobe_workflows/test_cellfinder.py   | 47 ++++++++++++++++---
 .../brainglobe_workflows/test_cellfinder.py   |  9 +---
 3 files changed, 57 insertions(+), 14 deletions(-)
 create mode 100644 tests/conftest.py

diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 00000000..d4251a31
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,15 @@
+import pytest
+
+
+@pytest.fixture()
+def default_input_config_cellfinder():
+    from brainglobe_workflows.utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER
+
+    return DEFAULT_JSON_CONFIG_PATH_CELLFINDER
+
+
+@pytest.fixture()
+def custom_logger_name():
+    from brainglobe_workflows.utils import __name__ as LOGGER_NAME
+
+    return LOGGER_NAME
diff --git a/tests/test_integration/brainglobe_workflows/test_cellfinder.py b/tests/test_integration/brainglobe_workflows/test_cellfinder.py
index 2dc57175..4d50afbd 100644
--- a/tests/test_integration/brainglobe_workflows/test_cellfinder.py
+++ b/tests/test_integration/brainglobe_workflows/test_cellfinder.py
@@ -1,15 +1,50 @@
-# from brainglobe_workflows.cellfinder import setup
+import logging
 
+from brainglobe_workflows.cellfinder import (
+    CellfinderConfig,
+    run_workflow_from_cellfinder_run,
+)
+from brainglobe_workflows.cellfinder import (
+    setup as setup_all,  # why do I need setup_all??
+)
 
-# def test_setup(input_config_path):
-#     setup(input_config_path)
 
-# check logger exists
+def test_setup(
+    default_input_config_cellfinder, custom_logger_name, monkeypatch, tmp_path
+):
+    # monkeypatch to change current directory to
+    # pytest temporary directory
+    # (cellfinder cache directory is created in cwd)
+    monkeypatch.chdir(tmp_path)
 
-# check CellfinderConfig config correct?
+    # run setup on default configuration
+    cfg = setup_all(default_input_config_cellfinder)
 
+    # check logger exists
+    logger = logging.getLogger(custom_logger_name)
+    assert logger.level == logging.DEBUG
+    assert logger.hasHandlers()
 
-# def test_run_workflow_from_cellfinder_run
+    # check config is CellfinderConfig
+    assert isinstance(cfg, CellfinderConfig)
+
+
+def test_run_workflow_from_cellfinder_run(
+    default_input_config_cellfinder, monkeypatch, tmp_path
+):
+    # monkeypatch to change current directory to
+    # pytest temporary directory
+    # (cellfinder cache directory is created in cwd)
+    monkeypatch.chdir(tmp_path)
+
+    # run setup
+    cfg = setup_all(default_input_config_cellfinder)
+
+    # run workflow
+    run_workflow_from_cellfinder_run(cfg)
+
+    # check output files are those expected?
+    assert (cfg.detected_cells_path).is_file()
 
 
 # def test_main
diff --git a/tests/test_unit/brainglobe_workflows/test_cellfinder.py b/tests/test_unit/brainglobe_workflows/test_cellfinder.py
index fd3c5550..698447bb 100644
--- a/tests/test_unit/brainglobe_workflows/test_cellfinder.py
+++ b/tests/test_unit/brainglobe_workflows/test_cellfinder.py
@@ -33,13 +33,6 @@ def cellfinder_GIN_data() -> dict:
     }
 
 
-@pytest.fixture()
-def input_config_default():
-    from brainglobe_workflows.utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER
-
-    return DEFAULT_JSON_CONFIG_PATH_CELLFINDER
-
-
 @pytest.fixture()
 def input_config_fetch_GIN(input_configs_dir):
     return input_configs_dir / "input_data_GIN.json"
@@ -175,7 +168,7 @@ def test_add_signal_and_background_files(
 @pytest.mark.parametrize(
     "input_config, message",
     [
-        ("input_config_default", "Using default config file"),
+        ("default_input_config_cellfinder", "Using default config file"),
         ("input_config_fetch_GIN", "Input config read from"),
     ],
 )

From 05335485f2a9b497dfbd304cc0d142389b4b30d5 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Wed, 6 Dec 2023 20:12:49 +0000
Subject: [PATCH 21/54] change logger name source in logger test

---
 tests/test_unit/brainglobe_workflows/test_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_unit/brainglobe_workflows/test_utils.py b/tests/test_unit/brainglobe_workflows/test_utils.py
index 92d78d20..5b7de0e8 100644
--- a/tests/test_unit/brainglobe_workflows/test_utils.py
+++ b/tests/test_unit/brainglobe_workflows/test_utils.py
@@ -3,11 +3,11 @@
 from brainglobe_workflows.utils import setup_logger
 
 
-def test_setup_logger():
+def test_setup_logger(custom_logger_name):
     logger = setup_logger()
 
     assert logger.level == logging.DEBUG
-    assert logger.name == "workflows.utils"
+    assert logger.name == custom_logger_name
     assert logger.hasHandlers()
     assert len(logger.handlers) == 1
     assert logger.handlers[0].name == "console_handler"

From 2dda25391d235a561ba8cc1d2547a9e318186c4a Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Wed, 6 Dec 2023 20:14:31 +0000
Subject: [PATCH 22/54] rename tests for benchmarks

---
 .../{test_cellfinder_benchmarks.py => test_cellfinder.py}         | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tests/test_integration/brainglobe_benchmarks/{test_cellfinder_benchmarks.py => test_cellfinder.py} (100%)

diff --git a/tests/test_integration/brainglobe_benchmarks/test_cellfinder_benchmarks.py b/tests/test_integration/brainglobe_benchmarks/test_cellfinder.py
similarity index 100%
rename from tests/test_integration/brainglobe_benchmarks/test_cellfinder_benchmarks.py
rename to tests/test_integration/brainglobe_benchmarks/test_cellfinder.py

From cc47a0f9d4b7f6d1c41a0a99dfc3d6dc39f104b7 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Wed, 6 Dec 2023 20:30:26 +0000
Subject: [PATCH 23/54] add entrypoint and preliminary tests (WIP)

---
 brainglobe_workflows/cellfinder.py            |  8 ++-
 pyproject.toml                                |  3 +
 .../brainglobe_workflows/test_cellfinder.py   | 64 ++++++++++++++++++-
 3 files changed, 71 insertions(+), 4 deletions(-)

diff --git a/brainglobe_workflows/cellfinder.py b/brainglobe_workflows/cellfinder.py
index 6bcefda3..59677f2b 100644
--- a/brainglobe_workflows/cellfinder.py
+++ b/brainglobe_workflows/cellfinder.py
@@ -363,7 +363,13 @@ def main(input_config_path=DEFAULT_JSON_CONFIG_PATH_CELLFINDER):
     # run workflow
     run_workflow_from_cellfinder_run(cfg)  # only this will be benchmarked
 
+    return cfg
+
+
+def app_wrapper():
+    typer.run(main)
+
 
 if __name__ == "__main__":
     # run setup and workflow
-    typer.run(main)
+    app_wrapper()
diff --git a/pyproject.toml b/pyproject.toml
index 473f949d..3bb1656b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,6 +44,9 @@ dev = [
   "setuptools_scm",
 ]
 
+[project.scripts]
+cellfinder-workflow = "brainglobe_workflows.cellfinder:app_wrapper"
+
 [build-system]
 requires = [
     "setuptools>=45",
diff --git a/tests/test_integration/brainglobe_workflows/test_cellfinder.py b/tests/test_integration/brainglobe_workflows/test_cellfinder.py
index 4d50afbd..34e74b53 100644
--- a/tests/test_integration/brainglobe_workflows/test_cellfinder.py
+++ b/tests/test_integration/brainglobe_workflows/test_cellfinder.py
@@ -1,11 +1,15 @@
 import logging
+import subprocess
+import sys
+from pathlib import Path
 
 from brainglobe_workflows.cellfinder import (
     CellfinderConfig,
+    main,
     run_workflow_from_cellfinder_run,
 )
 from brainglobe_workflows.cellfinder import (
-    setup as setup_all,  # why do I need setup_all??
+    setup as setup_all,  # --->why do I need setup_all??
 )
 
 
@@ -37,7 +41,7 @@ def test_run_workflow_from_cellfinder_run(
     # (cellfinder cache directory is created in cwd)
     monkeypatch.chdir(tmp_path)
 
-    # run setup
+    # run setup --- have as a fixture for these two test instead?
     cfg = setup_all(default_input_config_cellfinder)
 
     # run workflow
@@ -47,4 +51,58 @@ def test_run_workflow_from_cellfinder_run(
     assert (cfg.detected_cells_path).is_file()
 
 
-# def test_main
+# TODO: test main with default and custom json?
+def test_main(monkeypatch, tmp_path):
+    # monkeypatch to change current directory to
+    # pytest temporary directory
+    # (cellfinder cache directory is created in cwd)
+    monkeypatch.chdir(tmp_path)
+
+    cfg = main()
+
+    # check output files are those expected?
+    assert (cfg.detected_cells_path).is_file()
+
+
+# TODO: test main CLI with default and custom json?
+def test_app_wrapper(monkeypatch, tmp_path):
+    # monkeypatch to change current directory to
+    # pytest temporary directory
+    # (cellfinder cache directory is created in cwd)
+    monkeypatch.chdir(tmp_path)
+
+    # run workflow with no CLI arguments,
+    subprocess_output = subprocess.run(
+        [
+            sys.executable,
+            Path(__file__).resolve().parents[2]
+            / "brainglobe_workflows"
+            / "cellfinder.py",
+        ],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        text=True,
+        encoding="utf-8",
+    )
+
+    # check returncode
+    assert subprocess_output.returncode == 0
+
+
+def test_main_entry_point(monkeypatch, tmp_path):
+    #  monkeypatch to change current directory to
+    # pytest temporary directory
+    # (cellfinder cache directory is created in cwd)
+    monkeypatch.chdir(tmp_path)
+
+    # run workflow with no CLI arguments,
+    subprocess_output = subprocess.run(
+        ["cellfinder-workflow"],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        text=True,
+        encoding="utf-8",
+    )
+
+    # check returncode
+    assert subprocess_output.returncode == 0

From 36a78cc7ddd2a7ce54565f9cd8ff84a6f4034413 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Wed, 6 Dec 2023 20:31:26 +0000
Subject: [PATCH 24/54] remove benchmarks results

---
 brainglobe_benchmarks/results/benchmarks.json | 88 -------------------
 .../d83f97ba-conda-py3.10.json                |  1 -
 .../machine.json                              |  9 --
 3 files changed, 98 deletions(-)
 delete mode 100644 brainglobe_benchmarks/results/benchmarks.json
 delete mode 100644 brainglobe_benchmarks/results/eduroam-int-dhcp-97-153-159.ucl.ac.uk/d83f97ba-conda-py3.10.json
 delete mode 100644 brainglobe_benchmarks/results/eduroam-int-dhcp-97-153-159.ucl.ac.uk/machine.json

diff --git a/brainglobe_benchmarks/results/benchmarks.json b/brainglobe_benchmarks/results/benchmarks.json
deleted file mode 100644
index 8fe97cef..00000000
--- a/brainglobe_benchmarks/results/benchmarks.json
+++ /dev/null
@@ -1,88 +0,0 @@
-{
-    "cellfinder.TimeDetectCells.time_cellfinder_run": {
-        "code": "class TimeDetectCells:\n    def time_cellfinder_run(self):\n        cellfinder_run(\n            self.signal_array, self.background_array, self.cfg.voxel_sizes\n        )\n\n    def setup(self):\n        # basic setup\n        TimeBenchmarkPrepGIN.setup(self)\n    \n        # add input data as arrays to config\n        self.signal_array = read_with_dask(self.cfg.signal_dir_path)\n        self.background_array = read_with_dask(self.cfg.background_dir_path)\n\nclass TimeBenchmarkPrepGIN:\n    def setup_cache(\n        self,\n    ):\n        \"\"\"\n        Download the input data from the GIN repository to the local\n        directory specified in the default_config.json\n    \n        Notes\n        -----\n        The `setup_cache` method only performs the computations once\n        per benchmark round and then caches the result to disk [1]_. It cannot\n        be parametrised [2]_.\n    \n    \n        [1] https://asv.readthedocs.io/en/latest/writing_benchmarks.html#setup-and-teardown-functions\n        [2] https://asv.readthedocs.io/en/latest/writing_benchmarks.html#parameterized-benchmarks\n        \"\"\"\n    \n        # Check config file exists\n        assert Path(self.input_config_path).exists()\n    \n        # Instantiate a CellfinderConfig from the input json file\n        # (assumes config is json serializable)\n        with open(self.input_config_path) as cfg:\n            config_dict = json.load(cfg)\n        config = CellfinderConfig(**config_dict)\n    \n        # Download data with pooch\n        _ = pooch.retrieve(\n            url=config.data_url,\n            known_hash=config.data_hash,\n            path=config.install_path,\n            progressbar=True,\n            processor=pooch.Unzip(extract_dir=config.extract_dir_relative),\n        )\n    \n        # Check paths to input data should now exist in config\n        assert Path(config.signal_dir_path).exists()\n        assert Path(config.background_dir_path).exists()",
-        "min_run_count": 2,
-        "name": "cellfinder.TimeDetectCells.time_cellfinder_run",
-        "number": 0,
-        "param_names": [],
-        "params": [],
-        "repeat": 0,
-        "rounds": 2,
-        "sample_time": 0.01,
-        "setup_cache_key": "cellfinder:84",
-        "timeout": 600,
-        "type": "time",
-        "unit": "seconds",
-        "version": "a37269e5549803517d14817f7e9aede2930f6ba97c3faeba7235de092a74f6c8",
-        "warmup_time": 0.1
-    },
-    "cellfinder.TimeFullWorkflow.time_workflow_from_cellfinder_run": {
-        "code": "class TimeFullWorkflow:\n    def time_workflow_from_cellfinder_run(self):\n        run_workflow_from_cellfinder_run(self.cfg)\n\nclass TimeBenchmarkPrepGIN:\n    def setup(self):\n        \"\"\"\n        Run the cellfinder workflow setup steps.\n    \n        The command line input arguments are injected as dependencies.\n        \"\"\"\n    \n        # Run setup\n        cfg = setup_cellfinder_workflow(\n            [\n                \"--config\",\n                self.input_config_path,\n            ]\n        )\n    \n        # Save configuration as attribute\n        self.cfg = cfg\n\n    def setup_cache(\n        self,\n    ):\n        \"\"\"\n        Download the input data from the GIN repository to the local\n        directory specified in the default_config.json\n    \n        Notes\n        -----\n        The `setup_cache` method only performs the computations once\n        per benchmark round and then caches the result to disk [1]_. It cannot\n        be parametrised [2]_.\n    \n    \n        [1] https://asv.readthedocs.io/en/latest/writing_benchmarks.html#setup-and-teardown-functions\n        [2] https://asv.readthedocs.io/en/latest/writing_benchmarks.html#parameterized-benchmarks\n        \"\"\"\n    \n        # Check config file exists\n        assert Path(self.input_config_path).exists()\n    \n        # Instantiate a CellfinderConfig from the input json file\n        # (assumes config is json serializable)\n        with open(self.input_config_path) as cfg:\n            config_dict = json.load(cfg)\n        config = CellfinderConfig(**config_dict)\n    \n        # Download data with pooch\n        _ = pooch.retrieve(\n            url=config.data_url,\n            known_hash=config.data_hash,\n            path=config.install_path,\n            progressbar=True,\n            processor=pooch.Unzip(extract_dir=config.extract_dir_relative),\n        )\n    \n        # Check paths to input data should now exist in config\n        assert Path(config.signal_dir_path).exists()\n        assert Path(config.background_dir_path).exists()",
-        "min_run_count": 2,
-        "name": "cellfinder.TimeFullWorkflow.time_workflow_from_cellfinder_run",
-        "number": 0,
-        "param_names": [],
-        "params": [],
-        "repeat": 0,
-        "rounds": 2,
-        "sample_time": 0.01,
-        "setup_cache_key": "cellfinder:84",
-        "timeout": 600,
-        "type": "time",
-        "unit": "seconds",
-        "version": "972079517fbc189a9acc0af554fc3c8382e2add170992dfde452a1b5180a7910",
-        "warmup_time": 0.1
-    },
-    "cellfinder.TimeReadInputDask.time_read_background_with_dask": {
-        "code": "class TimeReadInputDask:\n    def time_read_background_with_dask(self):\n        read_with_dask(self.cfg.background_dir_path)\n\nclass TimeBenchmarkPrepGIN:\n    def setup(self):\n        \"\"\"\n        Run the cellfinder workflow setup steps.\n    \n        The command line input arguments are injected as dependencies.\n        \"\"\"\n    \n        # Run setup\n        cfg = setup_cellfinder_workflow(\n            [\n                \"--config\",\n                self.input_config_path,\n            ]\n        )\n    \n        # Save configuration as attribute\n        self.cfg = cfg\n\n    def setup_cache(\n        self,\n    ):\n        \"\"\"\n        Download the input data from the GIN repository to the local\n        directory specified in the default_config.json\n    \n        Notes\n        -----\n        The `setup_cache` method only performs the computations once\n        per benchmark round and then caches the result to disk [1]_. It cannot\n        be parametrised [2]_.\n    \n    \n        [1] https://asv.readthedocs.io/en/latest/writing_benchmarks.html#setup-and-teardown-functions\n        [2] https://asv.readthedocs.io/en/latest/writing_benchmarks.html#parameterized-benchmarks\n        \"\"\"\n    \n        # Check config file exists\n        assert Path(self.input_config_path).exists()\n    \n        # Instantiate a CellfinderConfig from the input json file\n        # (assumes config is json serializable)\n        with open(self.input_config_path) as cfg:\n            config_dict = json.load(cfg)\n        config = CellfinderConfig(**config_dict)\n    \n        # Download data with pooch\n        _ = pooch.retrieve(\n            url=config.data_url,\n            known_hash=config.data_hash,\n            path=config.install_path,\n            progressbar=True,\n            processor=pooch.Unzip(extract_dir=config.extract_dir_relative),\n        )\n    \n        # Check paths to input data should now exist in config\n        assert Path(config.signal_dir_path).exists()\n        assert Path(config.background_dir_path).exists()",
-        "min_run_count": 2,
-        "name": "cellfinder.TimeReadInputDask.time_read_background_with_dask",
-        "number": 0,
-        "param_names": [],
-        "params": [],
-        "repeat": 0,
-        "rounds": 2,
-        "sample_time": 0.01,
-        "setup_cache_key": "cellfinder:84",
-        "timeout": 600,
-        "type": "time",
-        "unit": "seconds",
-        "version": "bf1a26a9a90c3686fe63ef495a40bda58641061ee504e8aa682554ff5f25506b",
-        "warmup_time": 0.1
-    },
-    "cellfinder.TimeReadInputDask.time_read_signal_with_dask": {
-        "code": "class TimeReadInputDask:\n    def time_read_signal_with_dask(self):\n        read_with_dask(self.cfg.signal_dir_path)\n\nclass TimeBenchmarkPrepGIN:\n    def setup(self):\n        \"\"\"\n        Run the cellfinder workflow setup steps.\n    \n        The command line input arguments are injected as dependencies.\n        \"\"\"\n    \n        # Run setup\n        cfg = setup_cellfinder_workflow(\n            [\n                \"--config\",\n                self.input_config_path,\n            ]\n        )\n    \n        # Save configuration as attribute\n        self.cfg = cfg\n\n    def setup_cache(\n        self,\n    ):\n        \"\"\"\n        Download the input data from the GIN repository to the local\n        directory specified in the default_config.json\n    \n        Notes\n        -----\n        The `setup_cache` method only performs the computations once\n        per benchmark round and then caches the result to disk [1]_. It cannot\n        be parametrised [2]_.\n    \n    \n        [1] https://asv.readthedocs.io/en/latest/writing_benchmarks.html#setup-and-teardown-functions\n        [2] https://asv.readthedocs.io/en/latest/writing_benchmarks.html#parameterized-benchmarks\n        \"\"\"\n    \n        # Check config file exists\n        assert Path(self.input_config_path).exists()\n    \n        # Instantiate a CellfinderConfig from the input json file\n        # (assumes config is json serializable)\n        with open(self.input_config_path) as cfg:\n            config_dict = json.load(cfg)\n        config = CellfinderConfig(**config_dict)\n    \n        # Download data with pooch\n        _ = pooch.retrieve(\n            url=config.data_url,\n            known_hash=config.data_hash,\n            path=config.install_path,\n            progressbar=True,\n            processor=pooch.Unzip(extract_dir=config.extract_dir_relative),\n        )\n    \n        # Check paths to input data should now exist in config\n        assert Path(config.signal_dir_path).exists()\n        assert Path(config.background_dir_path).exists()",
-        "min_run_count": 2,
-        "name": "cellfinder.TimeReadInputDask.time_read_signal_with_dask",
-        "number": 0,
-        "param_names": [],
-        "params": [],
-        "repeat": 0,
-        "rounds": 2,
-        "sample_time": 0.01,
-        "setup_cache_key": "cellfinder:84",
-        "timeout": 600,
-        "type": "time",
-        "unit": "seconds",
-        "version": "d01c253717edd52a017c710e569a36349c463b190e1ac4f0ab786c16cde75bd5",
-        "warmup_time": 0.1
-    },
-    "cellfinder.TimeSaveCells.time_save_cells": {
-        "code": "class TimeSaveCells:\n    def time_save_cells(self):\n        save_cells(self.detected_cells, self.cfg.detected_cells_path)\n\n    def setup(self):\n        # basic setup\n        TimeBenchmarkPrepGIN.setup(self)\n    \n        # add input data as arrays to config\n        self.signal_array = read_with_dask(self.cfg.signal_dir_path)\n        self.background_array = read_with_dask(self.cfg.background_dir_path)\n    \n        # detect cells\n        self.detected_cells = cellfinder_run(\n            self.signal_array, self.background_array, self.cfg.voxel_sizes\n        )\n\nclass TimeBenchmarkPrepGIN:\n    def setup_cache(\n        self,\n    ):\n        \"\"\"\n        Download the input data from the GIN repository to the local\n        directory specified in the default_config.json\n    \n        Notes\n        -----\n        The `setup_cache` method only performs the computations once\n        per benchmark round and then caches the result to disk [1]_. It cannot\n        be parametrised [2]_.\n    \n    \n        [1] https://asv.readthedocs.io/en/latest/writing_benchmarks.html#setup-and-teardown-functions\n        [2] https://asv.readthedocs.io/en/latest/writing_benchmarks.html#parameterized-benchmarks\n        \"\"\"\n    \n        # Check config file exists\n        assert Path(self.input_config_path).exists()\n    \n        # Instantiate a CellfinderConfig from the input json file\n        # (assumes config is json serializable)\n        with open(self.input_config_path) as cfg:\n            config_dict = json.load(cfg)\n        config = CellfinderConfig(**config_dict)\n    \n        # Download data with pooch\n        _ = pooch.retrieve(\n            url=config.data_url,\n            known_hash=config.data_hash,\n            path=config.install_path,\n            progressbar=True,\n            processor=pooch.Unzip(extract_dir=config.extract_dir_relative),\n        )\n    \n        # Check paths to input data should now exist in config\n        assert Path(config.signal_dir_path).exists()\n        assert Path(config.background_dir_path).exists()",
-        "min_run_count": 2,
-        "name": "cellfinder.TimeSaveCells.time_save_cells",
-        "number": 0,
-        "param_names": [],
-        "params": [],
-        "repeat": 0,
-        "rounds": 2,
-        "sample_time": 0.01,
-        "setup_cache_key": "cellfinder:84",
-        "timeout": 600,
-        "type": "time",
-        "unit": "seconds",
-        "version": "7c1d5133e9b37b475e715b17d7d1ffaf8094679919a0ff1b8ac5625936d8af91",
-        "warmup_time": 0.1
-    },
-    "version": 2
-}
diff --git a/brainglobe_benchmarks/results/eduroam-int-dhcp-97-153-159.ucl.ac.uk/d83f97ba-conda-py3.10.json b/brainglobe_benchmarks/results/eduroam-int-dhcp-97-153-159.ucl.ac.uk/d83f97ba-conda-py3.10.json
deleted file mode 100644
index f4009e68..00000000
--- a/brainglobe_benchmarks/results/eduroam-int-dhcp-97-153-159.ucl.ac.uk/d83f97ba-conda-py3.10.json
+++ /dev/null
@@ -1 +0,0 @@
-{"commit_hash": "d83f97ba87ce4298084b16e45745cc33834b6a5e", "env_name": "conda-py3.10", "date": 1697216056000, "params": {"arch": "arm64", "cpu": "Apple M1 Pro", "machine": "eduroam-int-dhcp-97-153-159.ucl.ac.uk", "num_cpu": "10", "os": "Darwin 23.0.0", "ram": "34359738368", "python": "3.10"}, "python": "3.10", "requirements": {}, "env_vars": {}, "result_columns": ["result", "params", "version", "started_at", "duration", "stats_ci_99_a", "stats_ci_99_b", "stats_q_25", "stats_q_75", "stats_number", "stats_repeat", "samples", "profile"], "results": {"cellfinder.TimeDetectCells.time_cellfinder_run": [[13.403959749994101], [], "a37269e5549803517d14817f7e9aede2930f6ba97c3faeba7235de092a74f6c8", 1697453708463, 72.497, [-34.622], [61.43], [12.924], [13.884], [1], [2]], "cellfinder.TimeFullWorkflow.time_workflow_from_cellfinder_run": [[13.255898479488678], [], "972079517fbc189a9acc0af554fc3c8382e2add170992dfde452a1b5180a7910", 1697453742279, 68.613, [-26.059], [52.571], [12.863], [13.649], [1], [2]], "cellfinder.TimeReadInputDask.time_read_background_with_dask": [[0.0021500665956409645], [], "bf1a26a9a90c3686fe63ef495a40bda58641061ee504e8aa682554ff5f25506b", 1697453777825, 8.1392, [0.0019665], [0.0026427], [0.0020579], [0.0022618], [5], [10]], "cellfinder.TimeReadInputDask.time_read_signal_with_dask": [[0.002099774999078363], [], "d01c253717edd52a017c710e569a36349c463b190e1ac4f0ab786c16cde75bd5", 1697453781751, 7.5877, [0.0019399], [0.0026163], [0.0020621], [0.0022299], [5], [10]], "cellfinder.TimeSaveCells.time_save_cells": [[0.0007860087889160863], [], "7c1d5133e9b37b475e715b17d7d1ffaf8094679919a0ff1b8ac5625936d8af91", 1697453785631, 67.869, [-0.0078589], [0.009431], [0.00069956], [0.00087246], [19], [2]]}, "durations": {"<setup_cache cellfinder:84>": 194.625663}, "version": 2}
diff --git a/brainglobe_benchmarks/results/eduroam-int-dhcp-97-153-159.ucl.ac.uk/machine.json b/brainglobe_benchmarks/results/eduroam-int-dhcp-97-153-159.ucl.ac.uk/machine.json
deleted file mode 100644
index d08e58cd..00000000
--- a/brainglobe_benchmarks/results/eduroam-int-dhcp-97-153-159.ucl.ac.uk/machine.json
+++ /dev/null
@@ -1,9 +0,0 @@
-{
-    "arch": "arm64",
-    "cpu": "Apple M1 Pro",
-    "machine": "eduroam-int-dhcp-97-153-159.ucl.ac.uk",
-    "num_cpu": "10",
-    "os": "Darwin 23.0.0",
-    "ram": "34359738368",
-    "version": 1
-}

From 0956ca5e714d0b4735626832fd4711784a1694a0 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Wed, 6 Dec 2023 20:47:10 +0000
Subject: [PATCH 25/54] fix CLI test

---
 .../test_integration/brainglobe_workflows/test_cellfinder.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/test_integration/brainglobe_workflows/test_cellfinder.py b/tests/test_integration/brainglobe_workflows/test_cellfinder.py
index 34e74b53..2c67a7c6 100644
--- a/tests/test_integration/brainglobe_workflows/test_cellfinder.py
+++ b/tests/test_integration/brainglobe_workflows/test_cellfinder.py
@@ -64,7 +64,7 @@ def test_main(monkeypatch, tmp_path):
     assert (cfg.detected_cells_path).is_file()
 
 
-# TODO: test main CLI with default and custom json?
+# TODO: test main CLI with default and specific json?
 def test_app_wrapper(monkeypatch, tmp_path):
     # monkeypatch to change current directory to
     # pytest temporary directory
@@ -75,7 +75,7 @@ def test_app_wrapper(monkeypatch, tmp_path):
     subprocess_output = subprocess.run(
         [
             sys.executable,
-            Path(__file__).resolve().parents[2]
+            Path(__file__).resolve().parents[3]
             / "brainglobe_workflows"
             / "cellfinder.py",
         ],
@@ -89,6 +89,7 @@ def test_app_wrapper(monkeypatch, tmp_path):
     assert subprocess_output.returncode == 0
 
 
+# TODO: test main CLI with default and specific json?
 def test_main_entry_point(monkeypatch, tmp_path):
     #  monkeypatch to change current directory to
     # pytest temporary directory

From ea731dcc732764c28cf47bf9734fd8f91bba67cc Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 7 Dec 2023 11:09:57 +0000
Subject: [PATCH 26/54] remove condition for logger one handler only

---
 tests/test_unit/brainglobe_workflows/test_utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/test_unit/brainglobe_workflows/test_utils.py b/tests/test_unit/brainglobe_workflows/test_utils.py
index 5b7de0e8..aad3ec1e 100644
--- a/tests/test_unit/brainglobe_workflows/test_utils.py
+++ b/tests/test_unit/brainglobe_workflows/test_utils.py
@@ -9,5 +9,4 @@ def test_setup_logger(custom_logger_name):
     assert logger.level == logging.DEBUG
     assert logger.name == custom_logger_name
     assert logger.hasHandlers()
-    assert len(logger.handlers) == 1
     assert logger.handlers[0].name == "console_handler"

From 550ad45b82691e9be51adddac66281ef19493053 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 7 Dec 2023 11:50:07 +0000
Subject: [PATCH 27/54] add docstrings and typing

---
 tests/conftest.py                             |   8 +-
 .../brainglobe_workflows/test_cellfinder.py   |  81 +++++++++++--
 .../brainglobe_workflows/test_cellfinder.py   | 112 +++++++++++++-----
 .../brainglobe_workflows/test_utils.py        |   9 +-
 4 files changed, 170 insertions(+), 40 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index d4251a31..11105c45 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,15 +1,19 @@
+""" Pytest fixtures shared by unit and integration tests"""
+
+from pathlib import Path
+
 import pytest
 
 
 @pytest.fixture()
-def default_input_config_cellfinder():
+def default_input_config_cellfinder() -> Path:
     from brainglobe_workflows.utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER
 
     return DEFAULT_JSON_CONFIG_PATH_CELLFINDER
 
 
 @pytest.fixture()
-def custom_logger_name():
+def custom_logger_name() -> str:
     from brainglobe_workflows.utils import __name__ as LOGGER_NAME
 
     return LOGGER_NAME
diff --git a/tests/test_integration/brainglobe_workflows/test_cellfinder.py b/tests/test_integration/brainglobe_workflows/test_cellfinder.py
index 2c67a7c6..2bf676ec 100644
--- a/tests/test_integration/brainglobe_workflows/test_cellfinder.py
+++ b/tests/test_integration/brainglobe_workflows/test_cellfinder.py
@@ -3,26 +3,48 @@
 import sys
 from pathlib import Path
 
+import pytest
+
+# --->why do I need setup_all??
+from pytest import MonkeyPatch
+
 from brainglobe_workflows.cellfinder import (
     CellfinderConfig,
     main,
     run_workflow_from_cellfinder_run,
 )
 from brainglobe_workflows.cellfinder import (
-    setup as setup_all,  # --->why do I need setup_all??
+    setup as setup_full,
 )
 
 
+# should this be unit test?
 def test_setup(
-    default_input_config_cellfinder, custom_logger_name, monkeypatch, tmp_path
+    default_input_config_cellfinder: Path,
+    custom_logger_name: str,
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
 ):
+    """Test full setup for cellfinder workflow
+
+    Parameters
+    ----------
+    default_input_config_cellfinder : Path
+        Default input config file
+    custom_logger_name : str
+        Name of custom logger
+    monkeypatch : MonkeyPatch
+        Pytest fixture to use monkeypatching utils
+    tmp_path : Path
+        Pytest fixture providing a temporary path for each test
+    """
     # monkeypatch to change current directory to
     # pytest temporary directory
     # (cellfinder cache directory is created in cwd)
     monkeypatch.chdir(tmp_path)
 
     # run setup on default configuration
-    cfg = setup_all(default_input_config_cellfinder)
+    cfg = setup_full(str(default_input_config_cellfinder))
 
     # check logger exists
     logger = logging.getLogger(custom_logger_name)
@@ -33,26 +55,49 @@ def test_setup(
     assert isinstance(cfg, CellfinderConfig)
 
 
+# should this be unit test?
 def test_run_workflow_from_cellfinder_run(
-    default_input_config_cellfinder, monkeypatch, tmp_path
+    default_input_config_cellfinder: Path,
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
 ):
+    """Test running cellfinder workflow
+
+    Parameters
+    ----------
+    default_input_config_cellfinder : Path
+        Default input config file
+    monkeypatch : MonkeyPatch
+        Pytest fixture to use monkeypatching utils
+    tmp_path : Path
+        Pytest fixture providing a temporary path for each test
+    """
     # monkeypatch to change current directory to
     # pytest temporary directory
     # (cellfinder cache directory is created in cwd)
     monkeypatch.chdir(tmp_path)
 
     # run setup --- have as a fixture for these two test instead?
-    cfg = setup_all(default_input_config_cellfinder)
+    cfg = setup_full(str(default_input_config_cellfinder))
 
     # run workflow
     run_workflow_from_cellfinder_run(cfg)
 
     # check output files are those expected?
-    assert (cfg.detected_cells_path).is_file()
+    assert Path(cfg.detected_cells_path).is_file()
 
 
 # TODO: test main with default and custom json?
-def test_main(monkeypatch, tmp_path):
+def test_main(monkeypatch: MonkeyPatch, tmp_path: Path):
+    """Test main function for setting up and running cellfinder workflow
+
+    Parameters
+    ----------
+    monkeypatch : MonkeyPatch
+        Pytest fixture to use monkeypatching utils
+    tmp_path : Path
+        Pytest fixture providing a temporary path for each test
+    """
     # monkeypatch to change current directory to
     # pytest temporary directory
     # (cellfinder cache directory is created in cwd)
@@ -65,7 +110,16 @@ def test_main(monkeypatch, tmp_path):
 
 
 # TODO: test main CLI with default and specific json?
-def test_app_wrapper(monkeypatch, tmp_path):
+def test_app_wrapper(monkeypatch: MonkeyPatch, tmp_path: Path):
+    """Test running the cellfinder worklfow from the command line
+
+    Parameters
+    ----------
+    monkeypatch : MonkeyPatch
+        Pytest fixture to use monkeypatching utils
+    tmp_path : Path
+        Pytest fixture providing a temporary path for each test
+    """
     # monkeypatch to change current directory to
     # pytest temporary directory
     # (cellfinder cache directory is created in cwd)
@@ -90,7 +144,16 @@ def test_app_wrapper(monkeypatch, tmp_path):
 
 
 # TODO: test main CLI with default and specific json?
-def test_main_entry_point(monkeypatch, tmp_path):
+def test_main_entry_point(monkeypatch: MonkeyPatch, tmp_path: Path):
+    """Test running the cellfinder workflow via the predefined entry point
+
+    Parameters
+    ----------
+    monkeypatch : MonkeyPatch
+        Pytest fixture to use monkeypatching utils
+    tmp_path : Path
+        Pytest fixture providing a temporary path for each test
+    """
     #  monkeypatch to change current directory to
     # pytest temporary directory
     # (cellfinder cache directory is created in cwd)
diff --git a/tests/test_unit/brainglobe_workflows/test_cellfinder.py b/tests/test_unit/brainglobe_workflows/test_cellfinder.py
index 698447bb..680c51ea 100644
--- a/tests/test_unit/brainglobe_workflows/test_cellfinder.py
+++ b/tests/test_unit/brainglobe_workflows/test_cellfinder.py
@@ -14,7 +14,14 @@
 
 
 @pytest.fixture()
-def input_configs_dir():
+def input_configs_dir() -> Path:
+    """Return the test data directory path
+
+    Returns
+    -------
+    Path
+        Test data directory path
+    """
     return Path(__file__).parents[2] / "data"
 
 
@@ -34,7 +41,20 @@ def cellfinder_GIN_data() -> dict:
 
 
 @pytest.fixture()
-def input_config_fetch_GIN(input_configs_dir):
+def input_config_fetch_GIN(input_configs_dir: Path) -> Path:
+    """
+    Return the config json file for fetching data from GIN
+
+    Parameters
+    ----------
+    input_configs_dir : Path
+        Path to the directory holding the test config files.
+
+    Returns
+    -------
+    Path
+        Path to the config json file for fetching data from GIN
+    """
     return input_configs_dir / "input_data_GIN.json"
 
 
@@ -48,7 +68,16 @@ def input_config_fetch_GIN(input_configs_dir):
         "input_data_not_locally_or_GIN.json",
     ],
 )
-def test_read_cellfinder_config(input_config, input_configs_dir):
+def test_read_cellfinder_config(input_config: str, input_configs_dir: Path):
+    """Test for reading a cellfinder config file
+
+    Parameters
+    ----------
+    input_config : str
+        Name of input config json file
+    input_configs_dir : Path
+        Test data directory path
+    """
     # path to config json file
     input_config_path = input_configs_dir / input_config
 
@@ -89,27 +118,29 @@ def test_read_cellfinder_config(input_config, input_configs_dir):
     ],
 )
 def test_add_signal_and_background_files(
-    caplog,
-    tmp_path,
-    cellfinder_GIN_data,
-    input_configs_dir,
-    input_config,
-    message_pattern,
+    caplog: pytest.LogCaptureFixture,
+    tmp_path: Path,
+    cellfinder_GIN_data: dict,
+    input_configs_dir: Path,
+    input_config: str,
+    message_pattern: str,
 ):
-    """_summary_
+    """Test signal and background files addition to the cellfinder config
 
     Parameters
     ----------
-    caplog : _type_
-        _description_
+    caplog : pytest.LogCaptureFixture
+        Pytest fixture to capture the logs during testing
     tmp_path : Path
-        path to pytest-generated temporary directory
-    input_configs_dir : _type_
-        _description_
-    input_config : _type_
-        _description_
-    message : _type_
-        _description_
+        Pytest fixture providing a temporary path for each test
+    cellfinder_GIN_data : dict
+        Dict holding the URL and hash of the cellfinder test data in GIN
+    input_configs_dir : Path
+        Test data directory path
+    input_config : str
+        Name of input config json file
+    message_pattern : str
+        Expected pattern in the log
     """
     # instantiate our custom logger
     _ = setup_logger()
@@ -162,6 +193,7 @@ def test_add_signal_and_background_files(
     # check log messages
     assert len(caplog.messages) > 0
     out = re.fullmatch(message_pattern, caplog.messages[-1])
+    assert out is not None
     assert out.group() is not None
 
 
@@ -173,15 +205,36 @@ def test_add_signal_and_background_files(
     ],
 )
 def test_setup_workflow(
-    input_config, message, monkeypatch, tmp_path, caplog, request
+    input_config: str,
+    message: str,
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+    caplog: pytest.LogCaptureFixture,
+    request: pytest.FixtureRequest,
 ):
-    """Test setup steps for the cellfinder workflow are completed
+    """Test setup steps for the cellfinder workflow
 
     These setup steps include:
-    - instantiating a CellfinderConfig object with the required parameters,
-    - add signal and background files to config if these do not exist
-    - creating a timestamped directory for the output of the workflow if
-    it doesn't exist and adding its path to the config
+    - instantiating a CellfinderConfig object using the input json file,
+    - add the signal and background files to the config if these are not
+      defined,
+    - create a timestamped directory for the output of the workflow if
+      it doesn't exist and add its path to the config
+
+    Parameters
+    ----------
+    input_config : str
+        Name of input config json file
+    message : str
+        Expected log message
+    monkeypatch : pytest.MonkeyPatch
+        Pytest fixture to use monkeypatching utils
+    tmp_path : Path
+        Pytest fixture providing a temporary path for each test
+    caplog : pytest.LogCaptureFixture
+        Pytest fixture to capture the logs during testing
+    request : pytest.FixtureRequest
+        Pytest fixture to enable requesting fixtures by name
     """
 
     # setup logger
@@ -199,9 +252,11 @@ def test_setup_workflow(
     assert message in caplog.text
 
     # check all signal files exist
+    assert config.list_signal_files
     assert all([Path(f).is_file() for f in config.list_signal_files])
 
     # check all background files exist
+    assert config.list_background_files
     assert all([Path(f).is_file() for f in config.list_background_files])
 
     # check output directory exists
@@ -209,13 +264,14 @@ def test_setup_workflow(
 
     # check output directory name has correct format
     out = re.fullmatch(
-        config.output_path_basename_relative + "\\d{8}_\\d{6}$",
+        str(config.output_path_basename_relative) + "\\d{8}_\\d{6}$",
         Path(config.output_path).stem,
     )
+    assert out is not None
     assert out.group() is not None
 
     # check output file path
     assert (
-        config.detected_cells_path
-        == config.output_path / config.detected_cells_filename
+        Path(config.detected_cells_path)
+        == Path(config.output_path) / config.detected_cells_filename
     )
diff --git a/tests/test_unit/brainglobe_workflows/test_utils.py b/tests/test_unit/brainglobe_workflows/test_utils.py
index aad3ec1e..12cbfa7b 100644
--- a/tests/test_unit/brainglobe_workflows/test_utils.py
+++ b/tests/test_unit/brainglobe_workflows/test_utils.py
@@ -3,7 +3,14 @@
 from brainglobe_workflows.utils import setup_logger
 
 
-def test_setup_logger(custom_logger_name):
+def test_setup_logger(custom_logger_name: str):
+    """Test custom logger is correctly created
+
+    Parameters
+    ----------
+    custom_logger_name : str
+        Pytest fixture for the custom logger name
+    """
     logger = setup_logger()
 
     assert logger.level == logging.DEBUG

From 59a31ca02d5608678a8112f48e5dfb67b0dad76d Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 7 Dec 2023 13:55:36 +0000
Subject: [PATCH 28/54] remove conftest and move some integration tests to unit

---
 tests/conftest.py                             |  19 --
 .../brainglobe_workflows/test_cellfinder.py   |  99 +---------
 .../brainglobe_workflows/test_cellfinder.py   | 176 +++++++++++++++++-
 3 files changed, 183 insertions(+), 111 deletions(-)
 delete mode 100644 tests/conftest.py

diff --git a/tests/conftest.py b/tests/conftest.py
deleted file mode 100644
index 11105c45..00000000
--- a/tests/conftest.py
+++ /dev/null
@@ -1,19 +0,0 @@
-""" Pytest fixtures shared by unit and integration tests"""
-
-from pathlib import Path
-
-import pytest
-
-
-@pytest.fixture()
-def default_input_config_cellfinder() -> Path:
-    from brainglobe_workflows.utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER
-
-    return DEFAULT_JSON_CONFIG_PATH_CELLFINDER
-
-
-@pytest.fixture()
-def custom_logger_name() -> str:
-    from brainglobe_workflows.utils import __name__ as LOGGER_NAME
-
-    return LOGGER_NAME
diff --git a/tests/test_integration/brainglobe_workflows/test_cellfinder.py b/tests/test_integration/brainglobe_workflows/test_cellfinder.py
index 2bf676ec..55e8b03d 100644
--- a/tests/test_integration/brainglobe_workflows/test_cellfinder.py
+++ b/tests/test_integration/brainglobe_workflows/test_cellfinder.py
@@ -1,99 +1,19 @@
-import logging
 import subprocess
 import sys
 from pathlib import Path
 
 import pytest
 
-# --->why do I need setup_all??
-from pytest import MonkeyPatch
+from brainglobe_workflows.cellfinder import main
 
-from brainglobe_workflows.cellfinder import (
-    CellfinderConfig,
-    main,
-    run_workflow_from_cellfinder_run,
-)
-from brainglobe_workflows.cellfinder import (
-    setup as setup_full,
-)
 
-
-# should this be unit test?
-def test_setup(
-    default_input_config_cellfinder: Path,
-    custom_logger_name: str,
-    monkeypatch: pytest.MonkeyPatch,
-    tmp_path: Path,
-):
-    """Test full setup for cellfinder workflow
-
-    Parameters
-    ----------
-    default_input_config_cellfinder : Path
-        Default input config file
-    custom_logger_name : str
-        Name of custom logger
-    monkeypatch : MonkeyPatch
-        Pytest fixture to use monkeypatching utils
-    tmp_path : Path
-        Pytest fixture providing a temporary path for each test
-    """
-    # monkeypatch to change current directory to
-    # pytest temporary directory
-    # (cellfinder cache directory is created in cwd)
-    monkeypatch.chdir(tmp_path)
-
-    # run setup on default configuration
-    cfg = setup_full(str(default_input_config_cellfinder))
-
-    # check logger exists
-    logger = logging.getLogger(custom_logger_name)
-    assert logger.level == logging.DEBUG
-    assert logger.hasHandlers()
-
-    # check config is CellfinderConfig
-    assert isinstance(cfg, CellfinderConfig)
-
-
-# should this be unit test?
-def test_run_workflow_from_cellfinder_run(
-    default_input_config_cellfinder: Path,
-    monkeypatch: pytest.MonkeyPatch,
-    tmp_path: Path,
-):
-    """Test running cellfinder workflow
-
-    Parameters
-    ----------
-    default_input_config_cellfinder : Path
-        Default input config file
-    monkeypatch : MonkeyPatch
-        Pytest fixture to use monkeypatching utils
-    tmp_path : Path
-        Pytest fixture providing a temporary path for each test
-    """
-    # monkeypatch to change current directory to
-    # pytest temporary directory
-    # (cellfinder cache directory is created in cwd)
-    monkeypatch.chdir(tmp_path)
-
-    # run setup --- have as a fixture for these two test instead?
-    cfg = setup_full(str(default_input_config_cellfinder))
-
-    # run workflow
-    run_workflow_from_cellfinder_run(cfg)
-
-    # check output files are those expected?
-    assert Path(cfg.detected_cells_path).is_file()
-
-
-# TODO: test main with default and custom json?
-def test_main(monkeypatch: MonkeyPatch, tmp_path: Path):
+# TODO: test main with default and custom json (local and GIN)?
+def test_main(monkeypatch: pytest.MonkeyPatch, tmp_path: Path):
     """Test main function for setting up and running cellfinder workflow
 
     Parameters
     ----------
-    monkeypatch : MonkeyPatch
+    monkeypatch : pytest.MonkeyPatch
         Pytest fixture to use monkeypatching utils
     tmp_path : Path
         Pytest fixture providing a temporary path for each test
@@ -103,6 +23,7 @@ def test_main(monkeypatch: MonkeyPatch, tmp_path: Path):
     # (cellfinder cache directory is created in cwd)
     monkeypatch.chdir(tmp_path)
 
+    # run main
     cfg = main()
 
     # check output files are those expected?
@@ -110,12 +31,12 @@ def test_main(monkeypatch: MonkeyPatch, tmp_path: Path):
 
 
 # TODO: test main CLI with default and specific json?
-def test_app_wrapper(monkeypatch: MonkeyPatch, tmp_path: Path):
+def test_app_wrapper(monkeypatch: pytest.MonkeyPatch, tmp_path: Path):
     """Test running the cellfinder worklfow from the command line
 
     Parameters
     ----------
-    monkeypatch : MonkeyPatch
+    monkeypatch : pytest.MonkeyPatch
         Pytest fixture to use monkeypatching utils
     tmp_path : Path
         Pytest fixture providing a temporary path for each test
@@ -125,7 +46,7 @@ def test_app_wrapper(monkeypatch: MonkeyPatch, tmp_path: Path):
     # (cellfinder cache directory is created in cwd)
     monkeypatch.chdir(tmp_path)
 
-    # run workflow with no CLI arguments,
+    # run workflow script with no CLI arguments,
     subprocess_output = subprocess.run(
         [
             sys.executable,
@@ -144,12 +65,12 @@ def test_app_wrapper(monkeypatch: MonkeyPatch, tmp_path: Path):
 
 
 # TODO: test main CLI with default and specific json?
-def test_main_entry_point(monkeypatch: MonkeyPatch, tmp_path: Path):
+def test_main_entry_point(monkeypatch: pytest.MonkeyPatch, tmp_path: Path):
     """Test running the cellfinder workflow via the predefined entry point
 
     Parameters
     ----------
-    monkeypatch : MonkeyPatch
+    monkeypatch : pytest.MonkeyPatch
         Pytest fixture to use monkeypatching utils
     tmp_path : Path
         Pytest fixture providing a temporary path for each test
diff --git a/tests/test_unit/brainglobe_workflows/test_cellfinder.py b/tests/test_unit/brainglobe_workflows/test_cellfinder.py
index 680c51ea..b6044eb0 100644
--- a/tests/test_unit/brainglobe_workflows/test_cellfinder.py
+++ b/tests/test_unit/brainglobe_workflows/test_cellfinder.py
@@ -1,4 +1,5 @@
 import json
+import logging
 import re
 from pathlib import Path
 
@@ -6,16 +7,20 @@
 import pytest
 
 from brainglobe_workflows.cellfinder import (
+    CellfinderConfig,
     add_signal_and_background_files,
     read_cellfinder_config,
+    run_workflow_from_cellfinder_run,
     setup_workflow,
 )
+from brainglobe_workflows.cellfinder import setup as setup_full
 from brainglobe_workflows.utils import setup_logger
 
 
 @pytest.fixture()
 def input_configs_dir() -> Path:
-    """Return the test data directory path
+    """Return the directory path to the input configs
+    used for testing
 
     Returns
     -------
@@ -40,7 +45,7 @@ def cellfinder_GIN_data() -> dict:
     }
 
 
-@pytest.fixture()
+@pytest.fixture()  # ---Do I need this???
 def input_config_fetch_GIN(input_configs_dir: Path) -> Path:
     """
     Return the config json file for fetching data from GIN
@@ -58,6 +63,77 @@ def input_config_fetch_GIN(input_configs_dir: Path) -> Path:
     return input_configs_dir / "input_data_GIN.json"
 
 
+@pytest.fixture()  # ---Do I need this???
+def input_config_fetch_local(
+    input_configs_dir: Path,
+    cellfinder_GIN_data: dict,
+) -> Path:
+    """
+    Download the cellfinder data locally and return the config json
+    file for fetching data locally.
+
+    The data is downloaded to a directory relative to cwd
+
+    Parameters
+    ----------
+    input_configs_dir : Path
+        Path to the directory holding the test config files.
+    cellfinder_GIN_data : dict
+        URL and hash of the GIN repository with the cellfinder test data
+
+    Returns
+    -------
+    Path
+        Path to the config json file for fetching data locally
+    """
+    # read local config
+    input_config_path = input_configs_dir / "input_data_locally.json"
+    config = read_cellfinder_config(input_config_path)
+
+    # fetch data from GIN and download locally
+    pooch.retrieve(
+        url=cellfinder_GIN_data["url"],
+        known_hash=cellfinder_GIN_data["hash"],
+        path=config.install_path,  # path to download zip to
+        progressbar=True,
+        processor=pooch.Unzip(
+            extract_dir=config.data_dir_relative
+            # path to unzipped dir, *relative*  to 'path'
+        ),
+    )
+
+    return input_config_path
+
+
+@pytest.fixture()
+def default_input_config_cellfinder() -> Path:
+    """Return path to default input config for cellfinder workflow
+
+    Returns
+    -------
+    Path
+        Path to default input config
+
+    """
+    from brainglobe_workflows.utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER
+
+    return DEFAULT_JSON_CONFIG_PATH_CELLFINDER
+
+
+@pytest.fixture()
+def custom_logger_name() -> str:
+    """Return name of custom logger created in workflow utils
+
+    Returns
+    -------
+    str
+        Name of custom logger
+    """
+    from brainglobe_workflows.utils import __name__ as LOGGER_NAME
+
+    return LOGGER_NAME
+
+
 @pytest.mark.parametrize(
     "input_config",
     [
@@ -212,7 +288,8 @@ def test_setup_workflow(
     caplog: pytest.LogCaptureFixture,
     request: pytest.FixtureRequest,
 ):
-    """Test setup steps for the cellfinder workflow
+    """Test setup steps for the cellfinder workflow, using the default config
+    and passing a specific config file.
 
     These setup steps include:
     - instantiating a CellfinderConfig object using the input json file,
@@ -275,3 +352,96 @@ def test_setup_workflow(
         Path(config.detected_cells_path)
         == Path(config.output_path) / config.detected_cells_filename
     )
+
+
+@pytest.mark.parametrize(
+    "input_config",
+    [
+        "default_input_config_cellfinder",
+        "input_config_fetch_GIN",
+        "input_config_fetch_local",
+    ],
+)
+def test_setup(
+    input_config: Path,
+    custom_logger_name: str,
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+    request: pytest.FixtureRequest,
+):
+    """Test full setup for cellfinder workflow, using the default config
+    and passing a specific config file.
+
+    Parameters
+    ----------
+    input_config : Path
+        Path to input config file
+    custom_logger_name : str
+        Name of custom logger
+    monkeypatch : MonkeyPatch
+        Pytest fixture to use monkeypatching utils
+    tmp_path : Path
+        Pytest fixture providing a temporary path for each test
+    request : pytest.FixtureRequest
+        Pytest fixture to enable requesting fixtures by name
+    """
+    # Monkeypatch to change current directory to
+    # pytest temporary directory
+    # (cellfinder cache directory is created in cwd)
+    # ------ does this work alright for local?
+    monkeypatch.chdir(tmp_path)
+
+    # run setup on default configuration
+    cfg = setup_full(request.getfixturevalue(input_config))
+
+    # check logger exists
+    logger = logging.getLogger(custom_logger_name)
+    assert logger.level == logging.DEBUG
+    assert logger.hasHandlers()
+
+    # check config is CellfinderConfig
+    assert isinstance(cfg, CellfinderConfig)
+
+
+@pytest.mark.parametrize(
+    "input_config",
+    [
+        "default_input_config_cellfinder",
+        "input_config_fetch_GIN",
+        "input_config_fetch_local",
+    ],
+)
+def test_run_workflow_from_cellfinder_run(  # --------------------
+    input_config: str,
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+    request: pytest.FixtureRequest,
+):
+    """Test running cellfinder workflow with default input config
+    (fetches data from GIN) and local input config
+
+    Parameters
+    ----------
+    input_config : str
+        Path to input config json file
+    monkeypatch : MonkeyPatch
+        Pytest fixture to use monkeypatching utils
+    tmp_path : Path
+        Pytest fixture providing a temporary path for each test
+    request : pytest.FixtureRequest
+        Pytest fixture to enable requesting fixtures by name
+    """
+    # monkeypatch to change current directory to
+    # pytest temporary directory
+    # (cellfinder cache directory is created in cwd)
+    # ------ does this work alright for local?
+    monkeypatch.chdir(tmp_path)
+
+    # run setup
+    cfg = setup_full(str(request.getfixturevalue(input_config)))
+
+    # run workflow
+    run_workflow_from_cellfinder_run(cfg)
+
+    # check output files are those expected?
+    assert Path(cfg.detected_cells_path).is_file()

From 233c3df6eadaa7f5fed34e9a9c90fd611e4f130b Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 7 Dec 2023 15:51:27 +0000
Subject: [PATCH 29/54] recover conftest for shared fixtures

---
 tests/conftest.py | 96 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 96 insertions(+)
 create mode 100644 tests/conftest.py

diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 00000000..6550b7a8
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,96 @@
+"""Pytest fixtures shared across unit and integration tests"""
+
+from pathlib import Path
+
+import pooch
+import pytest
+
+from brainglobe_workflows.cellfinder import read_cellfinder_config
+
+
+@pytest.fixture()
+def input_configs_dir() -> Path:
+    """Return the directory path to the input configs
+    used for testing
+
+    Returns
+    -------
+    Path
+        Test data directory path
+    """
+    return Path(__file__).parent / "data"
+
+
+@pytest.fixture()
+def input_config_fetch_GIN(input_configs_dir: Path) -> Path:
+    """
+    Return the config json file for fetching data from GIN
+
+    Parameters
+    ----------
+    input_configs_dir : Path
+        Path to the directory holding the test config files.
+
+    Returns
+    -------
+    Path
+        Path to the config json file for fetching data from GIN
+    """
+    return input_configs_dir / "input_data_GIN.json"
+
+
+@pytest.fixture(scope="session")
+def cellfinder_GIN_data() -> dict:
+    """Return the URL and hash to the GIN repository with the input data
+
+    Returns
+    -------
+    dict
+        URL and hash of the GIN repository with the cellfinder test data
+    """
+    return {
+        "url": "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip",
+        "hash": "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914",  # noqa
+    }
+
+
+@pytest.fixture()
+def input_config_fetch_local(
+    input_configs_dir: Path,
+    cellfinder_GIN_data: dict,
+) -> Path:
+    """
+    Download the cellfinder data locally and return the config json
+    file for fetching data locally.
+
+    The data is downloaded to a directory relative to cwd
+
+    Parameters
+    ----------
+    input_configs_dir : Path
+        Path to the directory holding the test config files.
+    cellfinder_GIN_data : dict
+        URL and hash of the GIN repository with the cellfinder test data
+
+    Returns
+    -------
+    Path
+        Path to the config json file for fetching data locally
+    """
+    # read local config
+    input_config_path = input_configs_dir / "input_data_locally.json"
+    config = read_cellfinder_config(input_config_path)
+
+    # fetch data from GIN and download locally
+    pooch.retrieve(
+        url=cellfinder_GIN_data["url"],
+        known_hash=cellfinder_GIN_data["hash"],
+        path=config.install_path,  # path to download zip to
+        progressbar=True,
+        processor=pooch.Unzip(
+            extract_dir=config.data_dir_relative
+            # path to unzipped dir, *relative*  to 'path'
+        ),
+    )
+
+    return input_config_path

From 8dd13744966bbb03362fdfefec01234118411736 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 7 Dec 2023 15:54:51 +0000
Subject: [PATCH 30/54] parametrise integration tests

---
 .../brainglobe_workflows/test_cellfinder.py   | 119 ++++++++++++++----
 1 file changed, 94 insertions(+), 25 deletions(-)

diff --git a/tests/test_integration/brainglobe_workflows/test_cellfinder.py b/tests/test_integration/brainglobe_workflows/test_cellfinder.py
index 55e8b03d..a61f2316 100644
--- a/tests/test_integration/brainglobe_workflows/test_cellfinder.py
+++ b/tests/test_integration/brainglobe_workflows/test_cellfinder.py
@@ -1,22 +1,39 @@
 import subprocess
 import sys
 from pathlib import Path
+from typing import Optional
 
 import pytest
 
 from brainglobe_workflows.cellfinder import main
 
 
-# TODO: test main with default and custom json (local and GIN)?
-def test_main(monkeypatch: pytest.MonkeyPatch, tmp_path: Path):
+@pytest.mark.parametrize(
+    "input_config",
+    [
+        None,
+        "input_config_fetch_GIN",
+        "input_config_fetch_local",
+    ],
+)
+def test_main(
+    input_config: Optional[str],
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+    request: pytest.FixtureRequest,
+):
     """Test main function for setting up and running cellfinder workflow
 
     Parameters
     ----------
+    input_config : Optional[str]
+        Path to input config json file
     monkeypatch : pytest.MonkeyPatch
         Pytest fixture to use monkeypatching utils
     tmp_path : Path
         Pytest fixture providing a temporary path for each test
+    request : pytest.FixtureRequest
+        Pytest fixture to enable requesting fixtures by name
     """
     # monkeypatch to change current directory to
     # pytest temporary directory
@@ -24,69 +41,121 @@ def test_main(monkeypatch: pytest.MonkeyPatch, tmp_path: Path):
     monkeypatch.chdir(tmp_path)
 
     # run main
-    cfg = main()
+    if not input_config:
+        cfg = main()  # use default config
+    else:
+        cfg = main(str(request.getfixturevalue(input_config)))
 
-    # check output files are those expected?
+    # check output files exist
     assert (cfg.detected_cells_path).is_file()
 
 
-# TODO: test main CLI with default and specific json?
-def test_app_wrapper(monkeypatch: pytest.MonkeyPatch, tmp_path: Path):
+@pytest.mark.parametrize(
+    "input_config",
+    [
+        None,
+        "input_config_fetch_GIN",
+        "input_config_fetch_local",
+    ],
+)
+def test_app_wrapper(
+    input_config: Optional[str],
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+    request: pytest.FixtureRequest,
+):
     """Test running the cellfinder worklfow from the command line
 
     Parameters
     ----------
+    input_config : Optional[str]
+        Path to input config json file
     monkeypatch : pytest.MonkeyPatch
         Pytest fixture to use monkeypatching utils
     tmp_path : Path
         Pytest fixture providing a temporary path for each test
+    request : pytest.FixtureRequest
+        Pytest fixture to enable requesting fixtures by name
     """
     # monkeypatch to change current directory to
     # pytest temporary directory
     # (cellfinder cache directory is created in cwd)
     monkeypatch.chdir(tmp_path)
 
-    # run workflow script with no CLI arguments,
+    # define CLI input
+    script_path = (
+        Path(__file__).resolve().parents[3]
+        / "brainglobe_workflows"
+        / "cellfinder.py"
+    )
+    subprocess_input = [
+        sys.executable,
+        str(script_path),
+    ]
+    # append config if required
+    if input_config:
+        subprocess_input.append("--config")
+        subprocess_input.append(str(request.getfixturevalue(input_config)))
+
+    # run workflow script from the CLI
     subprocess_output = subprocess.run(
-        [
-            sys.executable,
-            Path(__file__).resolve().parents[3]
-            / "brainglobe_workflows"
-            / "cellfinder.py",
-        ],
-        stdout=subprocess.PIPE,
-        stderr=subprocess.STDOUT,
-        text=True,
-        encoding="utf-8",
+        subprocess_input,
+        # stdout=subprocess.PIPE,
+        # stderr=subprocess.STDOUT,
+        # text=True,
+        # encoding="utf-8",
     )
 
     # check returncode
     assert subprocess_output.returncode == 0
 
 
-# TODO: test main CLI with default and specific json?
-def test_main_entry_point(monkeypatch: pytest.MonkeyPatch, tmp_path: Path):
+@pytest.mark.parametrize(
+    "input_config",
+    [
+        None,
+        "input_config_fetch_GIN",
+        "input_config_fetch_local",
+    ],
+)
+def test_main_entry_point(
+    input_config: Optional[str],
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+    request: pytest.FixtureRequest,
+):
     """Test running the cellfinder workflow via the predefined entry point
 
     Parameters
     ----------
+    input_config : Optional[str]
+        Path to input config json file
     monkeypatch : pytest.MonkeyPatch
         Pytest fixture to use monkeypatching utils
     tmp_path : Path
         Pytest fixture providing a temporary path for each test
+    request : pytest.FixtureRequest
+        Pytest fixture to enable requesting fixtures by name
     """
-    #  monkeypatch to change current directory to
+    # monkeypatch to change current directory to
     # pytest temporary directory
     # (cellfinder cache directory is created in cwd)
     monkeypatch.chdir(tmp_path)
 
+    # define CLI input
+    subprocess_input = ["cellfinder-workflow"]
+    # append config if required
+    if input_config:
+        subprocess_input.append("--config")
+        subprocess_input.append(str(request.getfixturevalue(input_config)))
+
     # run workflow with no CLI arguments,
     subprocess_output = subprocess.run(
-        ["cellfinder-workflow"],
-        stdout=subprocess.PIPE,
-        stderr=subprocess.STDOUT,
-        text=True,
-        encoding="utf-8",
+        subprocess_input,
+        # stdout=subprocess.PIPE,
+        # stderr=subprocess.STDOUT,
+        # text=True,
+        # encoding="utf-8",
     )
 
     # check returncode

From 91a39b5b08be631bd239a247489edcea5c288b0e Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 7 Dec 2023 15:55:37 +0000
Subject: [PATCH 31/54] move shared fixtures from unit to conftest.py

---
 .../brainglobe_workflows/test_cellfinder.py   | 96 +------------------
 1 file changed, 3 insertions(+), 93 deletions(-)

diff --git a/tests/test_unit/brainglobe_workflows/test_cellfinder.py b/tests/test_unit/brainglobe_workflows/test_cellfinder.py
index b6044eb0..4b0ff1f9 100644
--- a/tests/test_unit/brainglobe_workflows/test_cellfinder.py
+++ b/tests/test_unit/brainglobe_workflows/test_cellfinder.py
@@ -17,94 +17,6 @@
 from brainglobe_workflows.utils import setup_logger
 
 
-@pytest.fixture()
-def input_configs_dir() -> Path:
-    """Return the directory path to the input configs
-    used for testing
-
-    Returns
-    -------
-    Path
-        Test data directory path
-    """
-    return Path(__file__).parents[2] / "data"
-
-
-@pytest.fixture(scope="session")
-def cellfinder_GIN_data() -> dict:
-    """Return the URL and hash to the GIN repository with the input data
-
-    Returns
-    -------
-    dict
-        URL and hash of the GIN repository with the cellfinder test data
-    """
-    return {
-        "url": "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip",
-        "hash": "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914",  # noqa
-    }
-
-
-@pytest.fixture()  # ---Do I need this???
-def input_config_fetch_GIN(input_configs_dir: Path) -> Path:
-    """
-    Return the config json file for fetching data from GIN
-
-    Parameters
-    ----------
-    input_configs_dir : Path
-        Path to the directory holding the test config files.
-
-    Returns
-    -------
-    Path
-        Path to the config json file for fetching data from GIN
-    """
-    return input_configs_dir / "input_data_GIN.json"
-
-
-@pytest.fixture()  # ---Do I need this???
-def input_config_fetch_local(
-    input_configs_dir: Path,
-    cellfinder_GIN_data: dict,
-) -> Path:
-    """
-    Download the cellfinder data locally and return the config json
-    file for fetching data locally.
-
-    The data is downloaded to a directory relative to cwd
-
-    Parameters
-    ----------
-    input_configs_dir : Path
-        Path to the directory holding the test config files.
-    cellfinder_GIN_data : dict
-        URL and hash of the GIN repository with the cellfinder test data
-
-    Returns
-    -------
-    Path
-        Path to the config json file for fetching data locally
-    """
-    # read local config
-    input_config_path = input_configs_dir / "input_data_locally.json"
-    config = read_cellfinder_config(input_config_path)
-
-    # fetch data from GIN and download locally
-    pooch.retrieve(
-        url=cellfinder_GIN_data["url"],
-        known_hash=cellfinder_GIN_data["hash"],
-        path=config.install_path,  # path to download zip to
-        progressbar=True,
-        processor=pooch.Unzip(
-            extract_dir=config.data_dir_relative
-            # path to unzipped dir, *relative*  to 'path'
-        ),
-    )
-
-    return input_config_path
-
-
 @pytest.fixture()
 def default_input_config_cellfinder() -> Path:
     """Return path to default input config for cellfinder workflow
@@ -363,7 +275,7 @@ def test_setup_workflow(
     ],
 )
 def test_setup(
-    input_config: Path,
+    input_config: str,
     custom_logger_name: str,
     monkeypatch: pytest.MonkeyPatch,
     tmp_path: Path,
@@ -374,7 +286,7 @@ def test_setup(
 
     Parameters
     ----------
-    input_config : Path
+    input_config : str
         Path to input config file
     custom_logger_name : str
         Name of custom logger
@@ -388,7 +300,6 @@ def test_setup(
     # Monkeypatch to change current directory to
     # pytest temporary directory
     # (cellfinder cache directory is created in cwd)
-    # ------ does this work alright for local?
     monkeypatch.chdir(tmp_path)
 
     # run setup on default configuration
@@ -411,7 +322,7 @@ def test_setup(
         "input_config_fetch_local",
     ],
 )
-def test_run_workflow_from_cellfinder_run(  # --------------------
+def test_run_workflow_from_cellfinder_run(
     input_config: str,
     monkeypatch: pytest.MonkeyPatch,
     tmp_path: Path,
@@ -434,7 +345,6 @@ def test_run_workflow_from_cellfinder_run(  # --------------------
     # monkeypatch to change current directory to
     # pytest temporary directory
     # (cellfinder cache directory is created in cwd)
-    # ------ does this work alright for local?
     monkeypatch.chdir(tmp_path)
 
     # run setup

From 8435f326871d65da20ab2cc9a832dcf795e3d7b0 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 7 Dec 2023 15:56:15 +0000
Subject: [PATCH 32/54] change argument for CLI via main using typer

---
 brainglobe_workflows/cellfinder.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/brainglobe_workflows/cellfinder.py b/brainglobe_workflows/cellfinder.py
index 59677f2b..1e6db94b 100644
--- a/brainglobe_workflows/cellfinder.py
+++ b/brainglobe_workflows/cellfinder.py
@@ -356,9 +356,9 @@ def run_workflow_from_cellfinder_run(cfg: CellfinderConfig):
     )
 
 
-def main(input_config_path=DEFAULT_JSON_CONFIG_PATH_CELLFINDER):
+def main(config=DEFAULT_JSON_CONFIG_PATH_CELLFINDER):
     # run setup
-    cfg = setup(input_config_path)
+    cfg = setup(config)
 
     # run workflow
     run_workflow_from_cellfinder_run(cfg)  # only this will be benchmarked

From 4395380c4469a7deee42d1aec19f33284d04f0f5 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 7 Dec 2023 16:18:13 +0000
Subject: [PATCH 33/54] move custom logger fixture to conftest under unit tests

---
 tests/test_unit/brainglobe_workflows/conftest.py  | 15 +++++++++++++++
 .../brainglobe_workflows/test_cellfinder.py       | 14 --------------
 2 files changed, 15 insertions(+), 14 deletions(-)
 create mode 100644 tests/test_unit/brainglobe_workflows/conftest.py

diff --git a/tests/test_unit/brainglobe_workflows/conftest.py b/tests/test_unit/brainglobe_workflows/conftest.py
new file mode 100644
index 00000000..06359c1e
--- /dev/null
+++ b/tests/test_unit/brainglobe_workflows/conftest.py
@@ -0,0 +1,15 @@
+import pytest
+
+
+@pytest.fixture()
+def custom_logger_name() -> str:
+    """Return name of custom logger created in workflow utils
+
+    Returns
+    -------
+    str
+        Name of custom logger
+    """
+    from brainglobe_workflows.utils import __name__ as LOGGER_NAME
+
+    return LOGGER_NAME
diff --git a/tests/test_unit/brainglobe_workflows/test_cellfinder.py b/tests/test_unit/brainglobe_workflows/test_cellfinder.py
index 4b0ff1f9..ddb4c706 100644
--- a/tests/test_unit/brainglobe_workflows/test_cellfinder.py
+++ b/tests/test_unit/brainglobe_workflows/test_cellfinder.py
@@ -32,20 +32,6 @@ def default_input_config_cellfinder() -> Path:
     return DEFAULT_JSON_CONFIG_PATH_CELLFINDER
 
 
-@pytest.fixture()
-def custom_logger_name() -> str:
-    """Return name of custom logger created in workflow utils
-
-    Returns
-    -------
-    str
-        Name of custom logger
-    """
-    from brainglobe_workflows.utils import __name__ as LOGGER_NAME
-
-    return LOGGER_NAME
-
-
 @pytest.mark.parametrize(
     "input_config",
     [

From 6e0801b5277ecc3bda3887c0dcd66ecc3df5b3cc Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 7 Dec 2023 16:21:22 +0000
Subject: [PATCH 34/54] fix for brainglobe workflows benchmark test

---
 asv.conf.json                                               | 6 +++---
 .../brainglobe_benchmarks/test_cellfinder.py                | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/asv.conf.json b/asv.conf.json
index d620a545..11837ae2 100644
--- a/asv.conf.json
+++ b/asv.conf.json
@@ -146,7 +146,7 @@
 
     // The directory (relative to the current directory) that benchmarks are
     // stored in.  If not provided, defaults to "benchmarks"
-    // "benchmark_dir": "benchmarks",
+    "benchmark_dir": "brainglobe_benchmarks",
 
     // The directory (relative to the current directory) to cache the Python
     // environments in.  If not provided, defaults to "env"
@@ -154,11 +154,11 @@
 
     // The directory (relative to the current directory) that raw benchmark
     // results are stored in.  If not provided, defaults to "results".
-    "results_dir": "benchmarks/results",
+    "results_dir": "brainglobe_benchmarks/results",
 
     // The directory (relative to the current directory) that the html tree
     // should be written to.  If not provided, defaults to "html".
-    "html_dir": "benchmarks/html",
+    "html_dir": "brainglobe_benchmarks/html",
 
     // The number of characters to retain in the commit hashes.
     // "hash_length": 8,
diff --git a/tests/test_integration/brainglobe_benchmarks/test_cellfinder.py b/tests/test_integration/brainglobe_benchmarks/test_cellfinder.py
index ab204764..a69270c3 100644
--- a/tests/test_integration/brainglobe_benchmarks/test_cellfinder.py
+++ b/tests/test_integration/brainglobe_benchmarks/test_cellfinder.py
@@ -23,7 +23,7 @@ def asv_config_monkeypatched_path(tmp_path):
         _description_
     """
     # read reference asv config
-    asv_original_path = Path(__file__).resolve().parents[2] / "asv.conf.json"
+    asv_original_path = Path(__file__).resolve().parents[3] / "asv.conf.json"
     asv_monkeypatched_dict = util.load_json(
         asv_original_path, js_comments=True
     )

From 9b69d1c63709c7520417bd139a0355c067ca2fa5 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 7 Dec 2023 16:39:34 +0000
Subject: [PATCH 35/54] mark benchmark tests as skip for now

---
 asv.conf.json                                          |  6 +++---
 .../brainglobe_benchmarks/test_cellfinder.py           | 10 +++++-----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/asv.conf.json b/asv.conf.json
index 11837ae2..733385d5 100644
--- a/asv.conf.json
+++ b/asv.conf.json
@@ -146,7 +146,7 @@
 
     // The directory (relative to the current directory) that benchmarks are
     // stored in.  If not provided, defaults to "benchmarks"
-    "benchmark_dir": "brainglobe_benchmarks",
+    "benchmark_dir": "benchmarks",
 
     // The directory (relative to the current directory) to cache the Python
     // environments in.  If not provided, defaults to "env"
@@ -154,11 +154,11 @@
 
     // The directory (relative to the current directory) that raw benchmark
     // results are stored in.  If not provided, defaults to "results".
-    "results_dir": "brainglobe_benchmarks/results",
+    "results_dir": "benchmarks/results",
 
     // The directory (relative to the current directory) that the html tree
     // should be written to.  If not provided, defaults to "html".
-    "html_dir": "brainglobe_benchmarks/html",
+    "html_dir": "benchmarks/html",
 
     // The number of characters to retain in the commit hashes.
     // "hash_length": 8,
diff --git a/tests/test_integration/brainglobe_benchmarks/test_cellfinder.py b/tests/test_integration/brainglobe_benchmarks/test_cellfinder.py
index a69270c3..81b716dc 100644
--- a/tests/test_integration/brainglobe_benchmarks/test_cellfinder.py
+++ b/tests/test_integration/brainglobe_benchmarks/test_cellfinder.py
@@ -27,8 +27,6 @@ def asv_config_monkeypatched_path(tmp_path):
     asv_monkeypatched_dict = util.load_json(
         asv_original_path, js_comments=True
     )
-    # with open(asv_original_path) as asv_config:
-    #     asv_monkeypatched_dict = json.load(asv_config)
 
     # change directories
     for ky in ["env_dir", "results_dir", "html_dir"]:
@@ -57,6 +55,7 @@ def asv_config_monkeypatched_path(tmp_path):
     return str(asv_monkeypatched_path)
 
 
+@pytest.mark.skip(reason="will be worked on a separate PR")
 def test_run_benchmarks(asv_config_monkeypatched_path):
     # --- ideally monkeypatch an asv config so that results are in tmp_dir?
 
@@ -84,13 +83,14 @@ def test_run_benchmarks(asv_config_monkeypatched_path):
         ],
         cwd=str(
             Path(asv_config_monkeypatched_path).parent
-        ),  # ---> from where asv config is
+        ),  # run from where asv config is
         stdout=subprocess.PIPE,
         stderr=subprocess.STDOUT,
         text=True,
         encoding="utf-8",
-    )  # STDOUT: "· Cloning project\n· Fetching recent changes\n·
-    # Creating environments\n· No __init__.py file in 'benchmarks'\n"?
+    )
+    # STDOUT: "· Cloning project\n· Fetching recent changes\n·
+    # Creating environments\n· No __init__.py file in 'benchmarks'\n"
 
     # check returncode
     assert asv_benchmark_output.returncode == 0

From 2bc55b4e043c60c303709a09f297b8a3c2b30050 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 7 Dec 2023 17:07:23 +0000
Subject: [PATCH 36/54] replace typer by argparse (WIP)

---
 brainglobe_workflows/cellfinder.py            | 19 ++++---
 brainglobe_workflows/utils.py                 | 49 +++++++++++++++++++
 pyproject.toml                                |  3 +-
 .../brainglobe_workflows/test_cellfinder.py   |  2 +-
 4 files changed, 62 insertions(+), 11 deletions(-)

diff --git a/brainglobe_workflows/cellfinder.py b/brainglobe_workflows/cellfinder.py
index 1e6db94b..df44a92a 100644
--- a/brainglobe_workflows/cellfinder.py
+++ b/brainglobe_workflows/cellfinder.py
@@ -22,12 +22,12 @@
 import json
 import logging
 import os
+import sys
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Optional, Tuple, Union
 
 import pooch
-import typer
 from brainglobe_utils.IO.cells import save_cells
 from cellfinder_core.main import main as cellfinder_run
 from cellfinder_core.tools.IO import read_with_dask
@@ -35,6 +35,7 @@
 
 from brainglobe_workflows.utils import (
     DEFAULT_JSON_CONFIG_PATH_CELLFINDER,
+    config_parser,
     setup_logger,
 )
 from brainglobe_workflows.utils import __name__ as LOGGER_NAME
@@ -356,9 +357,15 @@ def run_workflow_from_cellfinder_run(cfg: CellfinderConfig):
     )
 
 
-def main(config=DEFAULT_JSON_CONFIG_PATH_CELLFINDER):
+def main(
+    argv: Optional[list[str]] = None,
+):
+    # parse CLI arguments
+    argv = argv or sys.argv[1:]  # sys.argv[0] is the script name
+    args = config_parser(argv, str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER))
+
     # run setup
-    cfg = setup(config)
+    cfg = setup(args.config)
 
     # run workflow
     run_workflow_from_cellfinder_run(cfg)  # only this will be benchmarked
@@ -366,10 +373,6 @@ def main(config=DEFAULT_JSON_CONFIG_PATH_CELLFINDER):
     return cfg
 
 
-def app_wrapper():
-    typer.run(main)
-
-
 if __name__ == "__main__":
     # run setup and workflow
-    app_wrapper()
+    main()
diff --git a/brainglobe_workflows/utils.py b/brainglobe_workflows/utils.py
index 94d3fa14..8daf83fd 100644
--- a/brainglobe_workflows/utils.py
+++ b/brainglobe_workflows/utils.py
@@ -1,4 +1,5 @@
 # import argparse
+import argparse
 import logging
 import sys
 from pathlib import Path
@@ -35,3 +36,51 @@ def setup_logger() -> logging.Logger:
     logger.setLevel(logging.DEBUG)
     logger.addHandler(console_handler)
     return logger
+
+
+def config_parser(argv_: list[str], default_config: str) -> argparse.Namespace:
+    """Define argument parser for cellfinder
+    workflow script.
+
+    It expects a path to a json file with the
+    parameters required to run the workflow.
+    If none is provided, the default
+
+    Parameters
+    ----------
+    argv_ : list[str]
+        List of strings to parse
+    default_config : str
+        path to default config if none is passed as a CLI argument
+
+    Returns
+    -------
+    args : argparse.Namespace
+        command line input arguments parsed
+    """
+
+    # initialise argument parser
+    parser = argparse.ArgumentParser(
+        description=(
+            "To launch the workflow with "
+            "a specific set of input parameters, run: "
+            "`python brainglobe_workflows/cellfinder.py "
+            "--config path/to/config.json`"
+            "where path/to/input/config.json is the json file "
+            "containing the workflow parameters."
+        )
+    )
+    # add arguments
+    parser.add_argument(
+        "-c",
+        "--config",
+        default=default_config,
+        type=str,
+        metavar="CONFIG",  # a name for usage messages
+        help="",
+    )
+
+    # build parser object
+    args = parser.parse_args(argv_)
+
+    return args
diff --git a/pyproject.toml b/pyproject.toml
index 3bb1656b..5263fb21 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,7 +9,6 @@ dependencies = [
     "asv",
     "pooch",
     "cellfinder-core",
-    "typer"
 ]
 license = {text = "BSD-3-Clause"}
 
@@ -45,7 +44,7 @@ dev = [
 ]
 
 [project.scripts]
-cellfinder-workflow = "brainglobe_workflows.cellfinder:app_wrapper"
+cellfinder-workflow = "brainglobe_workflows.cellfinder:main"
 
 [build-system]
 requires = [
diff --git a/tests/test_integration/brainglobe_workflows/test_cellfinder.py b/tests/test_integration/brainglobe_workflows/test_cellfinder.py
index a61f2316..05a19bca 100644
--- a/tests/test_integration/brainglobe_workflows/test_cellfinder.py
+++ b/tests/test_integration/brainglobe_workflows/test_cellfinder.py
@@ -44,7 +44,7 @@ def test_main(
     if not input_config:
         cfg = main()  # use default config
     else:
-        cfg = main(str(request.getfixturevalue(input_config)))
+        cfg = main(["--config", str(request.getfixturevalue(input_config))])
 
     # check output files exist
     assert (cfg.detected_cells_path).is_file()

From ea8e1ee1f09bbb1e642f6cf9f40c1c803e22e537 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 7 Dec 2023 17:23:35 +0000
Subject: [PATCH 37/54] add test for config parser

---
 .../brainglobe_workflows/test_utils.py        | 20 ++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/tests/test_unit/brainglobe_workflows/test_utils.py b/tests/test_unit/brainglobe_workflows/test_utils.py
index 12cbfa7b..ffea5ba6 100644
--- a/tests/test_unit/brainglobe_workflows/test_utils.py
+++ b/tests/test_unit/brainglobe_workflows/test_utils.py
@@ -1,6 +1,12 @@
 import logging
 
-from brainglobe_workflows.utils import setup_logger
+import pytest
+
+from brainglobe_workflows.utils import (
+    DEFAULT_JSON_CONFIG_PATH_CELLFINDER,
+    config_parser,
+    setup_logger,
+)
 
 
 def test_setup_logger(custom_logger_name: str):
@@ -17,3 +23,15 @@ def test_setup_logger(custom_logger_name: str):
     assert logger.name == custom_logger_name
     assert logger.hasHandlers()
     assert logger.handlers[0].name == "console_handler"
+
+
+@pytest.mark.parametrize(
+    "list_input_args",
+    [[], ["--config", str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER)]],
+)
+def test_config_parser(list_input_args: list[str]):
+    args = config_parser(
+        list_input_args, str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER)
+    )
+
+    assert args.config

From fde2f6ed108aa9ed18174dcd76a87d1201836ae4 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 7 Dec 2023 18:09:35 +0000
Subject: [PATCH 38/54] argparse fix but not the best

---
 brainglobe_workflows/cellfinder.py            | 11 +++++---
 brainglobe_workflows/utils.py                 | 25 ++++++++++++-------
 .../brainglobe_workflows/test_cellfinder.py   |  6 ++---
 .../brainglobe_workflows/test_utils.py        |  5 ++--
 4 files changed, 30 insertions(+), 17 deletions(-)

diff --git a/brainglobe_workflows/cellfinder.py b/brainglobe_workflows/cellfinder.py
index df44a92a..0c5741b3 100644
--- a/brainglobe_workflows/cellfinder.py
+++ b/brainglobe_workflows/cellfinder.py
@@ -358,11 +358,16 @@ def run_workflow_from_cellfinder_run(cfg: CellfinderConfig):
 
 
 def main(
-    argv: Optional[list[str]] = None,
+    argv_: Optional[list[str]] = None,
 ):
     # parse CLI arguments
-    argv = argv or sys.argv[1:]  # sys.argv[0] is the script name
-    args = config_parser(argv, str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER))
+    # the following line will take sys.argv in most cases except
+    # for testing when we monkeypatch it
+    if argv_ is not None:
+        argv = argv_
+    else:
+        argv = sys.argv[1:]  # sys.argv[0] is the script name
+    args = config_parser(str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER), argv)
 
     # run setup
     cfg = setup(args.config)
diff --git a/brainglobe_workflows/utils.py b/brainglobe_workflows/utils.py
index 8daf83fd..4f34746f 100644
--- a/brainglobe_workflows/utils.py
+++ b/brainglobe_workflows/utils.py
@@ -3,6 +3,7 @@
 import logging
 import sys
 from pathlib import Path
+from typing import Optional
 
 DEFAULT_JSON_CONFIGS_PATH = Path(__file__).resolve().parent / "configs"
 
@@ -38,20 +39,24 @@ def setup_logger() -> logging.Logger:
     return logger
 
 
-def config_parser(argv_: list[str], default_config: str) -> argparse.Namespace:
-    """Define argument parser for cellfinder
-    workflow script.
+def config_parser(
+    default_config: str,
+    argv_: Optional[list[str]] = None,
+) -> argparse.Namespace:
+    """Define argument parser for a workflow script.
 
-    It expects a path to a json file with the
-    parameters required to run the workflow.
-    If none is provided, the default
+    The only CLI argument defined in the parser is
+    the input config file. The default config to use if
+    no config is passed must be passed as an input to this
+    function. The list of input arguments `arg_v` can be
+    an empty list or None.
 
     Parameters
     ----------
-    argv_ : list[str]
-        List of strings to parse
     default_config : str
-        path to default config if none is passed as a CLI argument
+        _description_
+    argv_ : Optional[list[str]], optional
+        _description_, by default None
 
     Returns
     -------
@@ -81,6 +86,8 @@ def config_parser(argv_: list[str], default_config: str) -> argparse.Namespace:
     )
 
     # build parser object
+    if not argv_:
+        argv_ = []
     args = parser.parse_args(argv_)
 
     return args
diff --git a/tests/test_integration/brainglobe_workflows/test_cellfinder.py b/tests/test_integration/brainglobe_workflows/test_cellfinder.py
index 05a19bca..01d28300 100644
--- a/tests/test_integration/brainglobe_workflows/test_cellfinder.py
+++ b/tests/test_integration/brainglobe_workflows/test_cellfinder.py
@@ -42,7 +42,7 @@ def test_main(
 
     # run main
     if not input_config:
-        cfg = main()  # use default config
+        cfg = main([])  # use default config
     else:
         cfg = main(["--config", str(request.getfixturevalue(input_config))])
 
@@ -58,7 +58,7 @@ def test_main(
         "input_config_fetch_local",
     ],
 )
-def test_app_wrapper(
+def test_script(
     input_config: Optional[str],
     monkeypatch: pytest.MonkeyPatch,
     tmp_path: Path,
@@ -118,7 +118,7 @@ def test_app_wrapper(
         "input_config_fetch_local",
     ],
 )
-def test_main_entry_point(
+def test_entry_point(
     input_config: Optional[str],
     monkeypatch: pytest.MonkeyPatch,
     tmp_path: Path,
diff --git a/tests/test_unit/brainglobe_workflows/test_utils.py b/tests/test_unit/brainglobe_workflows/test_utils.py
index ffea5ba6..d0d1156c 100644
--- a/tests/test_unit/brainglobe_workflows/test_utils.py
+++ b/tests/test_unit/brainglobe_workflows/test_utils.py
@@ -27,11 +27,12 @@ def test_setup_logger(custom_logger_name: str):
 
 @pytest.mark.parametrize(
     "list_input_args",
-    [[], ["--config", str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER)]],
+    [[], ["--config", str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER)], None],
 )
 def test_config_parser(list_input_args: list[str]):
     args = config_parser(
-        list_input_args, str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER)
+        str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER),
+        list_input_args,
     )
 
     assert args.config

From 69715483679ea29adcfc7bc7f63128c0a3a671fc Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 7 Dec 2023 18:17:33 +0000
Subject: [PATCH 39/54] change parser and test to always receive a list

---
 brainglobe_workflows/utils.py                 | 21 +++++++++----------
 .../brainglobe_workflows/test_utils.py        |  4 ++--
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/brainglobe_workflows/utils.py b/brainglobe_workflows/utils.py
index 4f34746f..9e5f1da7 100644
--- a/brainglobe_workflows/utils.py
+++ b/brainglobe_workflows/utils.py
@@ -3,7 +3,6 @@
 import logging
 import sys
 from pathlib import Path
-from typing import Optional
 
 DEFAULT_JSON_CONFIGS_PATH = Path(__file__).resolve().parent / "configs"
 
@@ -40,23 +39,25 @@ def setup_logger() -> logging.Logger:
 
 
 def config_parser(
+    argv: list[str],
     default_config: str,
-    argv_: Optional[list[str]] = None,
 ) -> argparse.Namespace:
     """Define argument parser for a workflow script.
 
     The only CLI argument defined in the parser is
-    the input config file. The default config to use if
-    no config is passed must be passed as an input to this
-    function. The list of input arguments `arg_v` can be
-    an empty list or None.
+    the input config file. The list of input arguments
+    `argv` can be an empty list.
+
+    Both the list of input arguments and the default config to use if
+    no config is specified must be passed as an input to this
+    function.
 
     Parameters
     ----------
+    argv_ : list[str]
+        _description_
     default_config : str
         _description_
-    argv_ : Optional[list[str]], optional
-        _description_, by default None
 
     Returns
     -------
@@ -86,8 +87,6 @@ def config_parser(
     )
 
     # build parser object
-    if not argv_:
-        argv_ = []
-    args = parser.parse_args(argv_)
+    args = parser.parse_args(argv)
 
     return args
diff --git a/tests/test_unit/brainglobe_workflows/test_utils.py b/tests/test_unit/brainglobe_workflows/test_utils.py
index d0d1156c..e4616cdb 100644
--- a/tests/test_unit/brainglobe_workflows/test_utils.py
+++ b/tests/test_unit/brainglobe_workflows/test_utils.py
@@ -27,12 +27,12 @@ def test_setup_logger(custom_logger_name: str):
 
 @pytest.mark.parametrize(
     "list_input_args",
-    [[], ["--config", str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER)], None],
+    [[], ["--config", str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER)]],
 )
 def test_config_parser(list_input_args: list[str]):
     args = config_parser(
-        str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER),
         list_input_args,
+        str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER),
     )
 
     assert args.config

From 36676cf2f6be3e71b1d5da51777e17df71eff680 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 7 Dec 2023 18:35:22 +0000
Subject: [PATCH 40/54] add default to main and move parser out of main

---
 brainglobe_workflows/cellfinder.py            | 21 +++++++------------
 .../brainglobe_workflows/test_cellfinder.py   |  4 ++--
 2 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/brainglobe_workflows/cellfinder.py b/brainglobe_workflows/cellfinder.py
index 0c5741b3..852e3ae1 100644
--- a/brainglobe_workflows/cellfinder.py
+++ b/brainglobe_workflows/cellfinder.py
@@ -357,20 +357,9 @@ def run_workflow_from_cellfinder_run(cfg: CellfinderConfig):
     )
 
 
-def main(
-    argv_: Optional[list[str]] = None,
-):
-    # parse CLI arguments
-    # the following line will take sys.argv in most cases except
-    # for testing when we monkeypatch it
-    if argv_ is not None:
-        argv = argv_
-    else:
-        argv = sys.argv[1:]  # sys.argv[0] is the script name
-    args = config_parser(str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER), argv)
-
+def main(input_config: str = str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER)):
     # run setup
-    cfg = setup(args.config)
+    cfg = setup(input_config)
 
     # run workflow
     run_workflow_from_cellfinder_run(cfg)  # only this will be benchmarked
@@ -379,5 +368,9 @@ def main(
 
 
 if __name__ == "__main__":
+    # parse CLI arguments
+    argv = sys.argv[1:]  # sys.argv[0] is the script name
+    args = config_parser(argv, str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER))
+
     # run setup and workflow
-    main()
+    main(args.config)
diff --git a/tests/test_integration/brainglobe_workflows/test_cellfinder.py b/tests/test_integration/brainglobe_workflows/test_cellfinder.py
index 01d28300..4dd626cc 100644
--- a/tests/test_integration/brainglobe_workflows/test_cellfinder.py
+++ b/tests/test_integration/brainglobe_workflows/test_cellfinder.py
@@ -42,9 +42,9 @@ def test_main(
 
     # run main
     if not input_config:
-        cfg = main([])  # use default config
+        cfg = main()
     else:
-        cfg = main(["--config", str(request.getfixturevalue(input_config))])
+        cfg = main(str(request.getfixturevalue(input_config)))
 
     # check output files exist
     assert (cfg.detected_cells_path).is_file()

From 0327e412d5ef2e49b9cd85d545097ca23995830e Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Fri, 8 Dec 2023 11:00:37 +0000
Subject: [PATCH 41/54] fix typing of List

---
 brainglobe_workflows/utils.py                      | 5 +++--
 tests/test_unit/brainglobe_workflows/test_utils.py | 3 ++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/brainglobe_workflows/utils.py b/brainglobe_workflows/utils.py
index 9e5f1da7..c69abcad 100644
--- a/brainglobe_workflows/utils.py
+++ b/brainglobe_workflows/utils.py
@@ -3,6 +3,7 @@
 import logging
 import sys
 from pathlib import Path
+from typing import List
 
 DEFAULT_JSON_CONFIGS_PATH = Path(__file__).resolve().parent / "configs"
 
@@ -39,7 +40,7 @@ def setup_logger() -> logging.Logger:
 
 
 def config_parser(
-    argv: list[str],
+    argv: List[str],
     default_config: str,
 ) -> argparse.Namespace:
     """Define argument parser for a workflow script.
@@ -54,7 +55,7 @@ def config_parser(
 
     Parameters
     ----------
-    argv_ : list[str]
+    argv_ : List[str]
         _description_
     default_config : str
         _description_
diff --git a/tests/test_unit/brainglobe_workflows/test_utils.py b/tests/test_unit/brainglobe_workflows/test_utils.py
index e4616cdb..2ec8d19e 100644
--- a/tests/test_unit/brainglobe_workflows/test_utils.py
+++ b/tests/test_unit/brainglobe_workflows/test_utils.py
@@ -1,4 +1,5 @@
 import logging
+from typing import List
 
 import pytest
 
@@ -29,7 +30,7 @@ def test_setup_logger(custom_logger_name: str):
     "list_input_args",
     [[], ["--config", str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER)]],
 )
-def test_config_parser(list_input_args: list[str]):
+def test_config_parser(list_input_args: List[str]):
     args = config_parser(
         list_input_args,
         str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER),

From 0614e8971bf59b428cc1481a11ff658ba13364bd Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Fri, 8 Dec 2023 12:49:02 +0000
Subject: [PATCH 42/54] add init file for benchmarks

---
 tests/test_integration/brainglobe_benchmarks/__init__.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 tests/test_integration/brainglobe_benchmarks/__init__.py

diff --git a/tests/test_integration/brainglobe_benchmarks/__init__.py b/tests/test_integration/brainglobe_benchmarks/__init__.py
new file mode 100644
index 00000000..e69de29b

From 3b66b3e697e81a78898da158b9d96c1638532adb Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Fri, 8 Dec 2023 12:49:21 +0000
Subject: [PATCH 43/54] reorganise fixtures in conftest

---
 tests/conftest.py | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 6550b7a8..c40b4517 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -21,6 +21,21 @@ def input_configs_dir() -> Path:
     return Path(__file__).parent / "data"
 
 
+@pytest.fixture(scope="session")
+def cellfinder_GIN_data() -> dict:
+    """Return the URL and hash to the GIN repository with the input data
+
+    Returns
+    -------
+    dict
+        URL and hash of the GIN repository with the cellfinder test data
+    """
+    return {
+        "url": "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip",
+        "hash": "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914",  # noqa
+    }
+
+
 @pytest.fixture()
 def input_config_fetch_GIN(input_configs_dir: Path) -> Path:
     """
@@ -39,21 +54,6 @@ def input_config_fetch_GIN(input_configs_dir: Path) -> Path:
     return input_configs_dir / "input_data_GIN.json"
 
 
-@pytest.fixture(scope="session")
-def cellfinder_GIN_data() -> dict:
-    """Return the URL and hash to the GIN repository with the input data
-
-    Returns
-    -------
-    dict
-        URL and hash of the GIN repository with the cellfinder test data
-    """
-    return {
-        "url": "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip",
-        "hash": "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914",  # noqa
-    }
-
-
 @pytest.fixture()
 def input_config_fetch_local(
     input_configs_dir: Path,

From 25abb7bb674b9c6c755701d4f5369d05f0d3bebf Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Fri, 8 Dec 2023 16:07:12 +0000
Subject: [PATCH 44/54] add wrapper fn for entrypoint

---
 brainglobe_workflows/cellfinder.py                 | 14 ++++++++++----
 pyproject.toml                                     |  5 +----
 .../brainglobe_workflows/test_cellfinder.py        |  8 --------
 3 files changed, 11 insertions(+), 16 deletions(-)

diff --git a/brainglobe_workflows/cellfinder.py b/brainglobe_workflows/cellfinder.py
index 852e3ae1..c1d1aeea 100644
--- a/brainglobe_workflows/cellfinder.py
+++ b/brainglobe_workflows/cellfinder.py
@@ -367,10 +367,16 @@ def main(input_config: str = str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER)):
     return cfg
 
 
-if __name__ == "__main__":
+def main_app_wrapper():
     # parse CLI arguments
-    argv = sys.argv[1:]  # sys.argv[0] is the script name
-    args = config_parser(argv, str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER))
+    args = config_parser(
+        sys.argv[1:],  # sys.argv[0] is the script name
+        str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER),
+    )
 
     # run setup and workflow
-    main(args.config)
+    _ = main(args.config)
+
+
+if __name__ == "__main__":
+    main_app_wrapper()
diff --git a/pyproject.toml b/pyproject.toml
index 5263fb21..cd297213 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,7 +44,7 @@ dev = [
 ]
 
 [project.scripts]
-cellfinder-workflow = "brainglobe_workflows.cellfinder:main"
+cellfinder-workflow = "brainglobe_workflows.cellfinder:main_app_wrapper"
 
 [build-system]
 requires = [
@@ -61,9 +61,6 @@ include-package-data = true
 include = ["brainglobe_workflows*"]
 exclude = ["tests*"]
 
-[tool.pytest.ini_options]
-addopts = "--cov=brainglobe_workflows"
-
 [tool.black]
 target-version = ['py38', 'py39', 'py310']
 skip-string-normalization = false
diff --git a/tests/test_integration/brainglobe_workflows/test_cellfinder.py b/tests/test_integration/brainglobe_workflows/test_cellfinder.py
index 4dd626cc..6e60ee28 100644
--- a/tests/test_integration/brainglobe_workflows/test_cellfinder.py
+++ b/tests/test_integration/brainglobe_workflows/test_cellfinder.py
@@ -100,10 +100,6 @@ def test_script(
     # run workflow script from the CLI
     subprocess_output = subprocess.run(
         subprocess_input,
-        # stdout=subprocess.PIPE,
-        # stderr=subprocess.STDOUT,
-        # text=True,
-        # encoding="utf-8",
     )
 
     # check returncode
@@ -152,10 +148,6 @@ def test_entry_point(
     # run workflow with no CLI arguments,
     subprocess_output = subprocess.run(
         subprocess_input,
-        # stdout=subprocess.PIPE,
-        # stderr=subprocess.STDOUT,
-        # text=True,
-        # encoding="utf-8",
     )
 
     # check returncode

From 8a269a37b5084fb7d9d847daf817facf81d7d617 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Mon, 11 Dec 2023 14:06:50 +0000
Subject: [PATCH 45/54] edit asv to fetch repo from this branch (for benchmarks
 PR)

---
 asv.conf.json | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/asv.conf.json b/asv.conf.json
index 733385d5..e030cd63 100644
--- a/asv.conf.json
+++ b/asv.conf.json
@@ -11,7 +11,8 @@
 
     // The URL or local path of the source code repository for the
     // project being benchmarked
-    "repo": ".",
+    // "repo": ".",
+    "repo": "https://github.com/brainglobe/brainglobe-workflows",
 
     // The Python project's subdirectory in your repo.  If missing or
     // the empty string, the project is assumed to be located at the root
@@ -39,14 +40,14 @@
 
     // List of branches to benchmark. If not provided, defaults to "master"
     // (for git) or "default" (for mercurial).
-    "branches": ["main"], // for git
+    "branches": ["smg/tests-refactor"], // for git
     // "branches": ["default"],    // for mercurial
 
     // The DVCS being used.  If not set, it will be automatically
     // determined from "repo" by looking at the protocol in the URL
     // (if remote), or by looking for special directories, such as
     // ".git" (if local).
-    // "dvcs": "git",
+    "dvcs": "git",
 
     // The tool to use to create environments.  May be "conda",
     // "virtualenv", "mamba" (above 3.8)

From a30c28b5fe31ccafd6d5b9ede973be7e26d9f38e Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Mon, 11 Dec 2023 14:14:56 +0000
Subject: [PATCH 46/54] edit paths to asv config (for benchmarks PR)

---
 asv.conf.json | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/asv.conf.json b/asv.conf.json
index e030cd63..8ce9490d 100644
--- a/asv.conf.json
+++ b/asv.conf.json
@@ -147,7 +147,7 @@
 
     // The directory (relative to the current directory) that benchmarks are
     // stored in.  If not provided, defaults to "benchmarks"
-    "benchmark_dir": "benchmarks",
+    "benchmark_dir": "brainglobe_benchmarks",
 
     // The directory (relative to the current directory) to cache the Python
     // environments in.  If not provided, defaults to "env"
@@ -155,11 +155,11 @@
 
     // The directory (relative to the current directory) that raw benchmark
     // results are stored in.  If not provided, defaults to "results".
-    "results_dir": "benchmarks/results",
+    "results_dir": "brainglobe_benchmarks/results",
 
     // The directory (relative to the current directory) that the html tree
     // should be written to.  If not provided, defaults to "html".
-    "html_dir": "benchmarks/html",
+    "html_dir": "brainglobe_benchmarks/html",
 
     // The number of characters to retain in the commit hashes.
     // "hash_length": 8,

From 9a8a4303fdd6f56d77df36932e9624cf10b991a6 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Mon, 11 Dec 2023 17:34:43 +0000
Subject: [PATCH 47/54] small edits from PR review comments

---
 brainglobe_workflows/cellfinder.py | 74 +++++++++++++++++++++++-------
 tests/conftest.py                  |  4 +-
 2 files changed, 60 insertions(+), 18 deletions(-)

diff --git a/brainglobe_workflows/cellfinder.py b/brainglobe_workflows/cellfinder.py
index c1d1aeea..d80819c5 100644
--- a/brainglobe_workflows/cellfinder.py
+++ b/brainglobe_workflows/cellfinder.py
@@ -131,15 +131,18 @@ def add_signal_and_background_files(
 ) -> CellfinderConfig:
     """
     Adds the lists of input data files (signal and background)
-    to the config, when these are not defined.
+    to the config.
 
-    It first checks if the input data exists locally.
-    - If both directories (signal and background) exist, the lists of
-    signal and background files are added to the config.
-    - If exactly one of the input data directories is missing, an error
+    These files are first searched locally. If not found, we
+    attempt to download them from GIN.
+
+    Specifically:
+    - If both parent data directories (signal and background) exist locally,
+    the lists of signal and background files are added to the config.
+    - If exactly one of the parent data directories is missing, an error
     message is logged.
     - If neither of them exist, the data is retrieved from the provided GIN
-    repository. If no URL or hash to GIN is provided, an error is shown.
+    repository. If no URL or hash to GIN is provided, an error is thrown.
 
     Parameters
     ----------
@@ -192,14 +195,8 @@ def add_signal_and_background_files(
     # If neither of the input data directories exist,
     # retrieve data from GIN repository and add list of files to config
     else:
-        # check if GIN URL and hash are defined (log error otherwise)
-        if (not config.data_url) or (not config.data_hash):
-            logger.error(
-                "Input data not found locally, and URL/hash to "
-                "GIN repository not provided"
-            )
-
-        else:
+        # Check if GIN URL and hash are defined (log error otherwise)
+        if config.data_url and config.data_hash:
             # get list of files in GIN archive with pooch.retrieve
             list_files_archive = pooch.retrieve(
                 url=config.data_url,
@@ -235,6 +232,12 @@ def add_signal_and_background_files(
                     str(Path(config.background_dir_path).resolve())
                 )
             ]
+        # If one of URL/hash to GIN repo not defined, throw an error
+        else:
+            logger.error(
+                "Input data not found locally, and URL/hash to "
+                "GIN repository not provided"
+            )
 
     return config
 
@@ -278,7 +281,7 @@ def setup_workflow(input_config_path: Path) -> CellfinderConfig:
         logger.info("Using default config file")
 
     # Add lists of input data files to the config,
-    # if these are defined yet
+    # if these are not defined yet
     if not (config.list_signal_files and config.list_background_files):
         # build fullpaths to input directories
         config.signal_dir_path = str(
@@ -301,7 +304,10 @@ def setup_workflow(input_config_path: Path) -> CellfinderConfig:
     output_path_timestamped = Path(config.install_path) / (
         str(config.output_path_basename_relative) + timestamp_formatted
     )
-    output_path_timestamped.mkdir(parents=True, exist_ok=True)
+    output_path_timestamped.mkdir(
+        parents=True,  # create any missing parents
+        exist_ok=True,  # ignore FileExistsError exceptions
+    )
 
     # Add output path and output file path to config
     config.output_path = output_path_timestamped
@@ -357,7 +363,29 @@ def run_workflow_from_cellfinder_run(cfg: CellfinderConfig):
     )
 
 
-def main(input_config: str = str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER)):
+def main(
+    input_config: str = str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER),
+) -> CellfinderConfig:
+    """
+    Setup and run cellfinder workflow.
+
+    This function runs the setup steps required
+    to run the cellfinder workflow, and the
+    workflow itself. Note that only the workflow
+    will be benchmarked.
+
+    Parameters
+    ----------
+    input_config : str, optional
+        Absolute path to input config file,
+        by default str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER)
+
+    Returns
+    -------
+    cfg : CellfinderConfig
+        a class with the required setup methods and parameters for
+        the cellfinder workflow
+    """
     # run setup
     cfg = setup(input_config)
 
@@ -368,6 +396,18 @@ def main(input_config: str = str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER)):
 
 
 def main_app_wrapper():
+    """
+    Parse command line arguments and
+    run cellfinder setup and workflow
+
+    This function is used to define an entry-point,
+    that allows the user to run the cellfinder workflow
+    for a given input config file as:
+    `cellfinder-workflow --config <path-to-input-config>`.
+
+    If no input config file is provided, the default is used.
+
+    """
     # parse CLI arguments
     args = config_parser(
         sys.argv[1:],  # sys.argv[0] is the script name
diff --git a/tests/conftest.py b/tests/conftest.py
index c40b4517..bad9a3da 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -63,7 +63,9 @@ def input_config_fetch_local(
     Download the cellfinder data locally and return the config json
     file for fetching data locally.
 
-    The data is downloaded to a directory relative to cwd
+    The data is downloaded to a directory under the current working
+    directory (that is, to a directory under the directory from where
+    pytest is launched).
 
     Parameters
     ----------

From 429f9703ee638e4dadbfa2c5e527b0f6f3257c5f Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Mon, 11 Dec 2023 17:48:48 +0000
Subject: [PATCH 48/54] Apply suggestions from code review

Co-authored-by: Alessandro Felder <alessandrofelder@users.noreply.github.com>
---
 brainglobe_workflows/cellfinder.py | 14 ++++++--------
 brainglobe_workflows/utils.py      |  9 +++++----
 tests/conftest.py                  |  4 ++--
 3 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/brainglobe_workflows/cellfinder.py b/brainglobe_workflows/cellfinder.py
index d80819c5..ec6dfa60 100644
--- a/brainglobe_workflows/cellfinder.py
+++ b/brainglobe_workflows/cellfinder.py
@@ -110,13 +110,13 @@ def read_cellfinder_config(input_config_path: Path):
 
     Parameters
     ----------
-    input_config_path : _type_
-        _description_
+    input_config_path : Path
+        Absolute path to a cellfinder config file
 
     Returns
     -------
-    _type_
-        _description_
+    CellfinderConfig:
+        The cellfinder config object, populated with data from the input
     """
     # read input config
     with open(input_config_path) as cfg:
@@ -147,14 +147,12 @@ def add_signal_and_background_files(
     Parameters
     ----------
     config : CellfinderConfig
-        a dataclass whose attributes are the parameters
-        for running cellfinder.
+        a cellfinder config with input data files to be validated
 
     Returns
     -------
     config : CellfinderConfig
-        a dataclass whose attributes are the parameters
-        for running cellfinder.
+        a cellfinder config with updated input data lists.
     """
     # Fetch logger
     logger = logging.getLogger(LOGGER_NAME)
diff --git a/brainglobe_workflows/utils.py b/brainglobe_workflows/utils.py
index c69abcad..a0725793 100644
--- a/brainglobe_workflows/utils.py
+++ b/brainglobe_workflows/utils.py
@@ -1,4 +1,3 @@
-# import argparse
 import argparse
 import logging
 import sys
@@ -13,16 +12,18 @@
 
 
 def setup_logger() -> logging.Logger:
-    """Setup a logger for this script
+    """Setup a logger for workflow runs
 
     The logger's level is set to DEBUG, and it
     is linked to a handler that writes to the
-    console
+    console. This utility function helps run 
+    workflows, and test their logs, in a 
+    consistent way.
 
     Returns
     -------
     logging.Logger
-        a logger object
+        a logger object configured for workflow runs
     """
     # define handler that writes to stdout
     console_handler = logging.StreamHandler(sys.stdout)
diff --git a/tests/conftest.py b/tests/conftest.py
index bad9a3da..83e05553 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -39,7 +39,7 @@ def cellfinder_GIN_data() -> dict:
 @pytest.fixture()
 def input_config_fetch_GIN(input_configs_dir: Path) -> Path:
     """
-    Return the config json file for fetching data from GIN
+    Return the cellfinder config json file that is configured to fetch from GIN
 
     Parameters
     ----------
@@ -61,7 +61,7 @@ def input_config_fetch_local(
 ) -> Path:
     """
     Download the cellfinder data locally and return the config json
-    file for fetching data locally.
+    file configured to fetch local data.
 
     The data is downloaded to a directory under the current working
     directory (that is, to a directory under the directory from where

From 2913946f3718911e05eec994b2a57eb45e01d1d4 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Mon, 11 Dec 2023 17:52:54 +0000
Subject: [PATCH 49/54] add teardown comment to benchmark test (currently
 skipped)

---
 tests/test_integration/brainglobe_benchmarks/test_cellfinder.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/test_integration/brainglobe_benchmarks/test_cellfinder.py b/tests/test_integration/brainglobe_benchmarks/test_cellfinder.py
index 81b716dc..99cd674a 100644
--- a/tests/test_integration/brainglobe_benchmarks/test_cellfinder.py
+++ b/tests/test_integration/brainglobe_benchmarks/test_cellfinder.py
@@ -98,3 +98,5 @@ def test_run_benchmarks(asv_config_monkeypatched_path):
     # check logs?
 
     # delete directories?
+    # check teardown after yield:
+    # https://docs.pytest.org/en/6.2.x/fixture.html#yield-fixtures-recommended

From 6e5a9a8f7954118eee72c5d6106103cea664e95a Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Mon, 11 Dec 2023 17:53:58 +0000
Subject: [PATCH 50/54] remove dvcs from monkeypatched asv config in benchmark
 test (currently skipped)

---
 .../brainglobe_benchmarks/test_cellfinder.py                 | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tests/test_integration/brainglobe_benchmarks/test_cellfinder.py b/tests/test_integration/brainglobe_benchmarks/test_cellfinder.py
index 99cd674a..ce6f5329 100644
--- a/tests/test_integration/brainglobe_benchmarks/test_cellfinder.py
+++ b/tests/test_integration/brainglobe_benchmarks/test_cellfinder.py
@@ -37,10 +37,7 @@ def asv_config_monkeypatched_path(tmp_path):
     # change repo to URL rather than local
     asv_monkeypatched_dict[
         "repo"
-    ] = "https://github.com/brainglobe/brainglobe-workflows/"
-    asv_monkeypatched_dict[
-        "dvcs"
-    ] = "git"  # not sure why I need this with the URL?
+    ] = "https://github.com/brainglobe/brainglobe-workflows.git"
 
     # define path to a temp json file to dump config data
     asv_monkeypatched_path = tmp_path / "asv.conf.json"

From c3974c0b1786b13be3f1f6b91d78fb1e4ea63cff Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Mon, 11 Dec 2023 17:55:25 +0000
Subject: [PATCH 51/54] fill in docstring for benchmark test (currently
 skipped)

---
 .../brainglobe_benchmarks/test_cellfinder.py        | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/tests/test_integration/brainglobe_benchmarks/test_cellfinder.py b/tests/test_integration/brainglobe_benchmarks/test_cellfinder.py
index ce6f5329..44d031cc 100644
--- a/tests/test_integration/brainglobe_benchmarks/test_cellfinder.py
+++ b/tests/test_integration/brainglobe_benchmarks/test_cellfinder.py
@@ -7,20 +7,21 @@
 
 
 @pytest.fixture()
-def asv_config_monkeypatched_path(tmp_path):
-    """Create a monkeypatched asv.conf.json file
-    in tmp_path and return its path
+def asv_config_monkeypatched_path(tmp_path: Path) -> str:
+    """
+    Create a monkeypatched asv.conf.json file
+    in a Pytest-generated temporary directory
+    and return its path
 
     Parameters
     ----------
     tmp_path : Path
         path to pytest-generated temporary directory
 
-
     Returns
     -------
-    _type_
-        _description_
+    str
+        Path to monkeypatched asv config file
     """
     # read reference asv config
     asv_original_path = Path(__file__).resolve().parents[3] / "asv.conf.json"

From 38d15e4a7f554b6aaffd22427605821d71d62f3f Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Mon, 11 Dec 2023 17:59:29 +0000
Subject: [PATCH 52/54] mypy fix

---
 tests/test_integration/brainglobe_workflows/test_cellfinder.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_integration/brainglobe_workflows/test_cellfinder.py b/tests/test_integration/brainglobe_workflows/test_cellfinder.py
index 6e60ee28..1f179b7b 100644
--- a/tests/test_integration/brainglobe_workflows/test_cellfinder.py
+++ b/tests/test_integration/brainglobe_workflows/test_cellfinder.py
@@ -47,7 +47,7 @@ def test_main(
         cfg = main(str(request.getfixturevalue(input_config)))
 
     # check output files exist
-    assert (cfg.detected_cells_path).is_file()
+    assert Path(cfg.detected_cells_path).is_file()
 
 
 @pytest.mark.parametrize(

From d867f9f25d7aa96c2554dc9af16d681ae0721149 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Mon, 11 Dec 2023 18:25:14 +0000
Subject: [PATCH 53/54] fix linting

---
 brainglobe_workflows/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/brainglobe_workflows/utils.py b/brainglobe_workflows/utils.py
index a0725793..4b3bdac3 100644
--- a/brainglobe_workflows/utils.py
+++ b/brainglobe_workflows/utils.py
@@ -16,8 +16,8 @@ def setup_logger() -> logging.Logger:
 
     The logger's level is set to DEBUG, and it
     is linked to a handler that writes to the
-    console. This utility function helps run 
-    workflows, and test their logs, in a 
+    console. This utility function helps run
+    workflows, and test their logs, in a
     consistent way.
 
     Returns

From f17027e6c9d66aed47c5d9db708445158f1a3d59 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Mon, 11 Dec 2023 18:29:40 +0000
Subject: [PATCH 54/54] logger_name in lowercase

---
 tests/test_unit/brainglobe_workflows/conftest.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_unit/brainglobe_workflows/conftest.py b/tests/test_unit/brainglobe_workflows/conftest.py
index 06359c1e..ae85bc53 100644
--- a/tests/test_unit/brainglobe_workflows/conftest.py
+++ b/tests/test_unit/brainglobe_workflows/conftest.py
@@ -10,6 +10,6 @@ def custom_logger_name() -> str:
     str
         Name of custom logger
     """
-    from brainglobe_workflows.utils import __name__ as LOGGER_NAME
+    from brainglobe_workflows.utils import __name__ as logger_name
 
-    return LOGGER_NAME
+    return logger_name