diff --git a/MANIFEST.in b/MANIFEST.in index c1b54649..34cf45e6 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,6 +3,7 @@ include README.md exclude .pre-commit-config.yaml recursive-include brainglobe_workflows *.py +include brainglobe_workflows/cellfinder/default_config.json recursive-exclude * __pycache__ recursive-exclude * *.py[co] diff --git a/brainglobe_workflows/__init__.py b/brainglobe_workflows/__init__.py index 28709be9..00081a03 100644 --- a/brainglobe_workflows/__init__.py +++ b/brainglobe_workflows/__init__.py @@ -1,7 +1,7 @@ from importlib.metadata import PackageNotFoundError, version try: - __version__ = version("brainglobe-scripts") + __version__ = version("brainglobe-workflows") except PackageNotFoundError: # package is not installed pass diff --git a/brainglobe_workflows/cellfinder/__init__.py b/brainglobe_workflows/cellfinder/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/brainglobe_workflows/cellfinder/cellfinder_main.py b/brainglobe_workflows/cellfinder/cellfinder_main.py new file mode 100644 index 00000000..04ec7663 --- /dev/null +++ b/brainglobe_workflows/cellfinder/cellfinder_main.py @@ -0,0 +1,390 @@ +"""This script reproduces the most common cellfinder workflow + +It receives as an (optional) command line input the path to a configuration +json file, that holds the values of the required parameters for the workflow. + +If no input json file is passed as a configuration, the default +configuration defined at brainglobe_workflows/cellfinder/default_config.json +is used. + +Example usage: + - to pass a custom configuration, run (from the cellfinder_main.py + parent directory): + python cellfinder_main.py --config path/to/input/config.json + - to use the default configuration, run + python cellfinder_main.py + + +""" + +import argparse +import datetime +import json +import logging +import os +import sys +from dataclasses import dataclass +from pathlib import Path +from typing import Optional, Tuple, Union + +import pooch +from brainglobe_utils.IO.cells import save_cells +from cellfinder_core.main import main as cellfinder_run +from cellfinder_core.tools.IO import read_with_dask +from cellfinder_core.train.train_yml import depth_type + +Pathlike = Union[str, os.PathLike] + +DEFAULT_JSON_CONFIG_PATH = ( + Path(__file__).resolve().parent / "default_config.json" +) + + +@dataclass +class CellfinderConfig: + """ + Define input and output data locations, and the parameters for + the cellfinder preprocessing steps. + """ + + # cellfinder workflows cache directory + install_path: Pathlike + + # cached subdirectory to save data to + extract_dir_relative: Pathlike + signal_subdir: str + background_subdir: str + output_path_basename_relative: Pathlike + detected_cells_filename: Pathlike + + # preprocessing parameters + voxel_sizes: Tuple[float, float, float] + start_plane: int + end_plane: int + trained_model: Optional[ + os.PathLike + ] # if None, it will use a default model + model_weights: Optional[os.PathLike] + model: str + batch_size: int + n_free_cpus: int + network_voxel_sizes: Tuple[int, int, int] + soma_diameter: int + ball_xy_size: int + ball_z_size: int + ball_overlap_fraction: float + log_sigma_size: float + n_sds_above_mean_thresh: int + soma_spread_factor: float + max_cluster_size: int + cube_width: int + cube_height: int + cube_depth: int + network_depth: depth_type + + # origin of data to download (if required) + data_url: Optional[str] = None + data_hash: Optional[str] = None + + # The following attributes are added + # during the setup phase of the workflow + list_signal_files: Optional[list] = None + list_background_files: Optional[list] = None + output_path: Pathlike = "" + signal_dir_path: Pathlike = "" + background_dir_path: Pathlike = "" + detected_cells_path: Pathlike = "" + + +def setup_logger() -> logging.Logger: + """Setup a logger for this script + + The logger's level is set to DEBUG, and it + is linked to a handler that writes to the + console and whose level is + + Returns + ------- + logging.Logger + a logger object + """ + # define handler that writes to stdout + console_handler = logging.StreamHandler(sys.stdout) + console_format = logging.Formatter("%(name)s %(levelname)s: %(message)s") + console_handler.setFormatter(console_format) + + # define logger and link to handler + logger = logging.getLogger( + __name__ + ) # if imported as a module, the logger is named after the module + logger.setLevel(logging.DEBUG) + logger.addHandler(console_handler) + return logger + + +def run_workflow_from_cellfinder_run(config: CellfinderConfig): + """ + Run workflow based on the cellfinder_core.main.main() + function. + + The steps are: + 1. Read the input signal and background data as two separate + Dask arrays. + 2. Run the main cellfinder pipeline on the input Dask arrays, + with the parameters defined in the input configuration (config). + 3. Save the detected cells as an xml file to the location specified in + the input configuration (config). + + Parameters + ---------- + config : CellfinderConfig + a class with the required setup methods and parameters for + the cellfinder workflow + """ + # Read input data as Dask arrays + signal_array = read_with_dask(config.signal_dir_path) + background_array = read_with_dask(config.background_dir_path) + + # Run main analysis using `cellfinder_run` + detected_cells = cellfinder_run( + signal_array, background_array, config.voxel_sizes + ) + + # Save results to xml file + save_cells( + detected_cells, + config.detected_cells_path, + ) + + +def setup_workflow(input_config_path: Path) -> CellfinderConfig: + """Run setup steps prior to executing the workflow + + These setup steps include: + - instantiating a CellfinderConfig object with the required parameters, + - checking if the input data exists locally, and fetching from + GIN repository otherwise, + - adding the path to the input data files to the config, and + - creating a timestamped directory for the output of the workflow if + it doesn't exist and adding its path to the config + + Parameters + ---------- + input_config_path : Path + path to the input config file + + Returns + ------- + config : CellfinderConfig + a dataclass whose attributes are the parameters + for running cellfinder. + """ + + # Check config file exists + assert input_config_path.exists() + + # Instantiate a CellfinderConfig from the input json file + # (assumes config is json serializable) + with open(input_config_path) as c: + config_dict = json.load(c) + config = CellfinderConfig(**config_dict) + + # Print info logs for status + logger.info(f"Input config read from {input_config_path}") + if input_config_path == DEFAULT_JSON_CONFIG_PATH: + logger.info("Using default config file") + + # Retrieve and add lists of input data to the config, + # if these are defined yet + if not (config.list_signal_files and config.list_background_files): + # build fullpaths to inputs + config.signal_dir_path = str( + Path(config.install_path) + / config.extract_dir_relative + / config.signal_subdir + ) + config.background_dir_path = str( + Path(config.install_path) + / config.extract_dir_relative + / config.background_subdir + ) + # retrieve data + config = retrieve_input_data(config) + + # Create timestamped output directory if it doesn't exist + timestamp = datetime.datetime.now() + timestamp_formatted = timestamp.strftime("%Y%m%d_%H%M%S") + output_path_timestamped = Path(config.install_path) / ( + str(config.output_path_basename_relative) + timestamp_formatted + ) + output_path_timestamped.mkdir(parents=True, exist_ok=True) + + # Add output path and output file path to config + config.output_path = output_path_timestamped + config.detected_cells_path = ( + config.output_path / config.detected_cells_filename + ) + + return config + + +def retrieve_input_data(config: CellfinderConfig) -> CellfinderConfig: + """ + Adds the lists of input data files (signal and background) to the config. + + It first checks if the input data exists locally. + - If both directories (signal and background) exist, the lists of signal + and background files are added to the config. + - If exactly one of the input data directories is missing, an error + message is logged. + - If neither of them exist, the data is retrieved from the provided GIN + repository. If no URL or hash to GIN is provided, an error is shown. + + Parameters + ---------- + config : CellfinderConfig + a dataclass whose attributes are the parameters + for running cellfinder. + + Returns + ------- + config : CellfinderConfig + a dataclass whose attributes are the parameters + for running cellfinder. + """ + # Check if input data (signal and background) exist locally. + # If both directories exist, get list of signal and background files + if ( + Path(config.signal_dir_path).exists() + and Path(config.background_dir_path).exists() + ): + logger.info("Fetching input data from the local directories") + + config.list_signal_files = [ + f + for f in Path(config.signal_dir_path).resolve().iterdir() + if f.is_file() + ] + config.list_background_files = [ + f + for f in Path(config.background_dir_path).resolve().iterdir() + if f.is_file() + ] + + # If exactly one of the input data directories is missing, print error + elif ( + Path(config.signal_dir_path).resolve().exists() + or Path(config.background_dir_path).resolve().exists() + ): + if not Path(config.signal_dir_path).resolve().exists(): + logger.error( + f"The directory {config.signal_dir_path} does not exist" + ) + else: + logger.error( + f"The directory {config.background_dir_path} does not exist" + ) + + # If neither of them exist, retrieve data from GIN repository + else: + # check if GIN URL and hash are defined (log error otherwise) + if (not config.data_url) or (not config.data_hash): + logger.error( + "Input data not found locally, and URL/hash to " + "GIN repository not provided" + ) + + else: + # get list of files in GIN archive with pooch.retrieve + list_files_archive = pooch.retrieve( + url=config.data_url, + known_hash=config.data_hash, + path=config.install_path, # zip will be downloaded here + progressbar=True, + processor=pooch.Unzip( + extract_dir=config.extract_dir_relative + # path to unzipped dir, + # *relative* to the path set in 'path' + ), + ) + logger.info("Fetching input data from the provided GIN repository") + + # Check signal and background parent directories exist now + assert Path(config.signal_dir_path).resolve().exists() + assert Path(config.background_dir_path).resolve().exists() + + # Add signal files to config + config.list_signal_files = [ + f + for f in list_files_archive + if f.startswith( + str(Path(config.signal_dir_path).resolve()) + ) # if str(config.signal_dir_path) in f + ] + + # Add background files to config + config.list_background_files = [ + f + for f in list_files_archive + if f.startswith( + str(Path(config.background_dir_path).resolve()) + ) # if str(config.background_dir_path) in f + ] + + return config + + +def parse_cli_arguments() -> argparse.Namespace: + """Define argument parser for cellfinder + workflow script. + + It expects a path to a json file with the + parameters required to run the workflow. + If none is provided, the default + + Returns + ------- + args : argparse.Namespace + command line input arguments parsed + """ + # initialise argument parser + parser = argparse.ArgumentParser( + description=( + "To launch the workflow with " + "a desired set of input parameters, run:" + " `python cellfinder_main.py --config path/to/input/config.json` " + "where path/to/input/config.json is the json file " + "containing the workflow parameters." + ) + ) + # add arguments + parser.add_argument( + "-c", + "--config", + default=str(DEFAULT_JSON_CONFIG_PATH), + type=str, + metavar="CONFIG", # a name for usage messages + help="", + ) + + # build parser object + args = parser.parse_args() + + # print error if required arguments not provided + if not args.config: + logger.error("Paths to input config not provided.") + parser.print_help() + + return args + + +if __name__ == "__main__": + # setup logger + logger = setup_logger() + + # parse command line arguments + args = parse_cli_arguments() + + # run workflow + config = setup_workflow(Path(args.config)) + run_workflow_from_cellfinder_run(config) # only this will be benchmarked diff --git a/brainglobe_workflows/cellfinder/default_config.json b/brainglobe_workflows/cellfinder/default_config.json new file mode 100644 index 00000000..a80a4ba4 --- /dev/null +++ b/brainglobe_workflows/cellfinder/default_config.json @@ -0,0 +1,39 @@ +{ + "install_path": ".cellfinder_workflows", + "data_url": "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip", + "data_hash": "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914", + "extract_dir_relative": "cellfinder_test_data", + "signal_subdir": "signal", + "background_subdir": "background", + "output_path_basename_relative": "cellfinder_output_", + "detected_cells_filename": "detected_cells.xml", + "voxel_sizes": [ + 5, + 2, + 2 + ], + "start_plane": 0, + "end_plane": -1, + "trained_model": null, + "model_weights": null, + "model": "resnet50_tv", + "batch_size": 32, + "n_free_cpus": 2, + "network_voxel_sizes": [ + 5, + 1, + 1 + ], + "soma_diameter": 16, + "ball_xy_size": 6, + "ball_z_size": 15, + "ball_overlap_fraction": 0.6, + "log_sigma_size": 0.2, + "n_sds_above_mean_thresh": 10, + "soma_spread_factor": 1.4, + "max_cluster_size": 100000, + "cube_width": 50, + "cube_height": 50, + "cube_depth": 20, + "network_depth": "50" +} diff --git a/pyproject.toml b/pyproject.toml index ec5c3257..c78a6bb6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,10 @@ description = "A place to keep scripts to use as benchmarks, user-examples end-t readme = "README.md" requires-python = ">=3.8.0" dynamic = ["version"] - +dependencies = [ + "pooch", + "cellfinder-core" +] license = {text = "BSD-3-Clause"} classifiers = [ @@ -51,11 +54,11 @@ build-backend = "setuptools.build_meta" include-package-data = true [tool.setuptools.packages.find] -include = ["brainglobe_scripts*"] +include = ["brainglobe_workflows*"] exclude = ["tests*"] [tool.pytest.ini_options] -addopts = "--cov=brainglobe_scripts" +addopts = "--cov=brainglobe_workflows" [tool.black] target-version = ['py38', 'py39', 'py310'] @@ -101,5 +104,5 @@ python = extras = dev commands = - pytest -v --color=yes --cov=brainglobe_scripts --cov-report=xml + pytest -v --color=yes --cov=brainglobe_workflows --cov-report=xml """ diff --git a/tests/test_integration/conftest.py b/tests/test_integration/conftest.py new file mode 100644 index 00000000..d9207917 --- /dev/null +++ b/tests/test_integration/conftest.py @@ -0,0 +1,290 @@ +import json +from pathlib import Path +from typing import Any + +import pooch +import pytest + +from brainglobe_workflows.cellfinder.cellfinder_main import CellfinderConfig + + +def make_config_dict_fetch_from_local(cellfinder_cache_dir: Path) -> dict: + """Generate a config dictionary with the required parameters + for the workflow + + The input data is assumed to be locally at cellfinder_cache_dir. + The results are saved in a timestamped output subdirectory under + cellfinder_cache_dir + + Parameters + ---------- + cellfinder_cache_dir : Path + Path to the directory where the downloaded input data will be unzipped, + and the output will be saved + + Returns + ------- + dict + dictionary with the required parameters for the workflow + """ + return { + "install_path": cellfinder_cache_dir, + "extract_dir_relative": "cellfinder_test_data", # relative path + "signal_subdir": "signal", + "background_subdir": "background", + "output_path_basename_relative": "cellfinder_output_", + "detected_cells_filename": "detected_cells.xml", + "voxel_sizes": [5, 2, 2], # microns + "start_plane": 0, + "end_plane": -1, + "trained_model": None, # if None, it will use a default model + "model_weights": None, + "model": "resnet50_tv", + "batch_size": 32, + "n_free_cpus": 2, + "network_voxel_sizes": [5, 1, 1], + "soma_diameter": 16, + "ball_xy_size": 6, + "ball_z_size": 15, + "ball_overlap_fraction": 0.6, + "log_sigma_size": 0.2, + "n_sds_above_mean_thresh": 10, + "soma_spread_factor": 1.4, + "max_cluster_size": 100000, + "cube_width": 50, + "cube_height": 50, + "cube_depth": 20, + "network_depth": "50", + } + + +def make_config_dict_fetch_from_GIN( + cellfinder_cache_dir: Path, + data_url: str, + data_hash: str, +) -> dict: + """Generate a config dictionary with the required parameters + for the workflow + + The input data is fetched from GIN and downloaded to cellfinder_cache_dir. + The results are also saved in a timestamped output subdirectory under + cellfinder_cache_dir + + Parameters + ---------- + cellfinder_cache_dir : Path + Path to the directory where the downloaded input data will be unzipped, + and the output will be saved + data_url: str + URL to the GIN repository with the data to download + data_hash: str + Hash of the data to download + + Returns + ------- + dict + dictionary with the required parameters for the workflow + """ + + config = make_config_dict_fetch_from_local(cellfinder_cache_dir) + config["data_url"] = data_url + config["data_hash"] = data_hash + + return config + + +def prep_json(obj: Any) -> Any: + """ + Returns a JSON encodable version of the input object. + + It uses the JSON default encoder for all objects + except those of type `Path`. + + + Parameters + ---------- + obj : Any + _description_ + + Returns + ------- + Any + JSON serializable version of input object + """ + if isinstance(obj, Path): + return str(obj) + else: + json_decoder = json.JSONEncoder() + return json_decoder.default(obj) + + +@pytest.fixture(autouse=True) +def cellfinder_cache_dir(tmp_path: Path) -> Path: + """Create a .cellfinder_workflows directory + under a temporary pytest directory and return + its path. + + The temporary directory is available via pytest's tmp_path + fixture. A new temporary directory is created every function call + (i.e., scope="function") + + Parameters + ---------- + tmp_path : Path + path to pytest-generated temporary directory + + Returns + ------- + Path + path to the created cellfinder_workflows cache directory + """ + + return Path(tmp_path) / ".cellfinder_workflows" + + +@pytest.fixture(scope="session") +def data_url() -> str: + """Return the URL to the GIN repository with the input data + + Returns + ------- + str + URL to the GIN repository with the input data + """ + return "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip" + + +@pytest.fixture(scope="session") +def data_hash() -> str: + """Return the hash of the GIN input data + + Returns + ------- + str + Hash to the GIN input data + """ + return "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914" + + +@pytest.fixture(scope="session") +def default_json_config_path() -> Path: + """Return the path to the json file + with the default config parameters + + Returns + ------- + Path + path to the json file with the default config parameters + """ + from brainglobe_workflows.cellfinder.cellfinder_main import ( + DEFAULT_JSON_CONFIG_PATH, + ) + + return DEFAULT_JSON_CONFIG_PATH + + +@pytest.fixture() +def path_to_config_fetch_GIN( + tmp_path: Path, cellfinder_cache_dir: Path, data_url: str, data_hash: str +) -> Path: + """Create an input config that fetches data from GIN and + return its path + + Parameters + ---------- + tmp_path : Path + path to a fresh pytest-generated temporary directory. The + generated config is saved here. + + cellfinder_cache_dir : Path + path to the cellfinder cache directory, where the paths + in the config should point to. + + data_url: str + URL to the GIN repository with the input data + + data_hash: str + hash to the GIN input data + + Returns + ------- + input_config_path : Path + path to config file that fetches data from GIN + """ + # create config dict + config_dict = make_config_dict_fetch_from_GIN( + cellfinder_cache_dir, data_url, data_hash + ) + + # create a temp json file to dump config data + input_config_path = ( + tmp_path / "input_config.json" + ) # save it in a temp dir separate from cellfinder_cache_dir + + # save config data to json file + with open(input_config_path, "w") as js: + json.dump(config_dict, js, default=prep_json) + + # check json file exists + assert Path(input_config_path).is_file() + + return input_config_path + + +@pytest.fixture() +def path_to_config_fetch_local( + tmp_path: Path, cellfinder_cache_dir: Path, data_url: str, data_hash: str +) -> Path: + """Create an input config that points to local data and + return its path. + + The local data is downloaded from GIN, but no reference + to the GIN repository is included in the config. + + Parameters + ---------- + tmp_path : Path + path to a fresh pytest-generated temporary directory. The + generated config is saved here. + + cellfinder_cache_dir : Path + path to the cellfinder cache directory, where the paths + in the config should point to. + + data_url: str + URL to the GIN repository with the input data + + data_hash: str + hash to the GIN input data + + Returns + ------- + path_to_config_fetch_GIN : Path + path to a config file that fetches data from GIN + """ + + # instantiate basic config (assumes data is local) + config_dict = make_config_dict_fetch_from_local(cellfinder_cache_dir) + config = CellfinderConfig(**config_dict) + + # download GIN data to specified local directory + pooch.retrieve( + url=data_url, + known_hash=data_hash, + path=config.install_path, # path to download zip to + progressbar=True, + processor=pooch.Unzip( + extract_dir=config.extract_dir_relative + # path to unzipped dir, *relative* to 'path' + ), + ) + + # save config to json + input_config_path = tmp_path / "input_config.json" + with open(input_config_path, "w") as js: + json.dump(config_dict, js, default=prep_json) + + # check json file exists + assert Path(input_config_path).is_file() + + return input_config_path diff --git a/tests/test_integration/test_cellfinder_workflow.py b/tests/test_integration/test_cellfinder_workflow.py new file mode 100644 index 00000000..e55d0a46 --- /dev/null +++ b/tests/test_integration/test_cellfinder_workflow.py @@ -0,0 +1,211 @@ +import json +import subprocess +import sys +from pathlib import Path + +from brainglobe_workflows.cellfinder.cellfinder_main import CellfinderConfig + + +def test_run_with_default_config(tmp_path, default_json_config_path): + """Test workflow run with no command line arguments + + If no command line arguments are provided, the default + config at brainglobe_workflows/cellfinder/default_config.json + should be used. + + After the workflow is run we check that: + - there are no errors (via returncode), + - the logs reflect the default config file was used, and + - a single output directory exists with the expected + output file inside it + + Parameters + ---------- + tmp_path : Path + path to a pytest-generated temporary directory. + """ + + # run workflow with no CLI arguments, + # with cwd=tmp_path + subprocess_output = subprocess.run( + [ + sys.executable, + Path(__file__).resolve().parents[2] + / "brainglobe_workflows" + / "cellfinder" + / "cellfinder_main.py", + ], + cwd=tmp_path, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + encoding="utf-8", + ) + + # check returncode + assert subprocess_output.returncode == 0 + + # check logs + assert "Using default config file" in subprocess_output.stdout + + # Check one output directory exists and has expected + # output file inside it + assert_outputs(default_json_config_path, tmp_path) + + +def test_run_with_GIN_data( + path_to_config_fetch_GIN, +): + """Test workflow runs when passing a config that fetches data + from the GIN repository + + After the workflow is run we check that: + - there are no errors (via returncode), + - the logs reflect the input config file was used, + - the logs reflect the data was downloaded from GIN, and + - a single output directory exists with the expected + output file inside it + + Parameters + ---------- + tmp_path : Path + path to a pytest-generated temporary directory. + """ + # run workflow with CLI and capture log + subprocess_output = subprocess.run( + [ + sys.executable, + Path(__file__).resolve().parents[2] + / "brainglobe_workflows" + / "cellfinder" + / "cellfinder_main.py", + "--config", + str(path_to_config_fetch_GIN), + ], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + encoding="utf-8", + ) + + # check returncode + assert subprocess_output.returncode == 0 + + # check logs + assert ( + f"Input config read from {str(path_to_config_fetch_GIN)}" + in subprocess_output.stdout + ) + assert ( + "Fetching input data from the provided GIN repository" + in subprocess_output.stdout + ) + + # check one output directory exists and + # has expected output file inside it + assert_outputs(path_to_config_fetch_GIN) + + +def test_run_with_local_data( + path_to_config_fetch_local, +): + """Test workflow runs when passing a config that uses + local data + + After the workflow is run we check that: + - there are no errors (via returncode), + - the logs reflect the input config file was used, + - the logs reflect the data was found locally, and + - a single output directory exists with the expected + output file inside it + + Parameters + ---------- + tmp_path : Path + path to a pytest-generated temporary directory. + """ + + # run workflow with CLI + subprocess_output = subprocess.run( + [ + sys.executable, + Path(__file__).resolve().parents[2] + / "brainglobe_workflows" + / "cellfinder" + / "cellfinder_main.py", + "--config", + str(path_to_config_fetch_local), + ], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + encoding="utf-8", + ) + + # check returncode + assert subprocess_output.returncode == 0 + + # check logs + assert ( + f"Input config read from {str(path_to_config_fetch_local)}" + in subprocess_output.stdout + ) + assert ( + "Fetching input data from the local directories" + in subprocess_output.stdout + ) + + # check one output directory exists and + # has expected output file inside it + assert_outputs(path_to_config_fetch_local) + + +def assert_outputs(path_to_config, parent_dir_of_install_path=""): + """Helper function to determine whether the output is + as expected. + + It checks that: + - a single output directory exists, and + - the expected output file exists inside it + + Note that config.output_path is only defined after the workflow + setup is run, because its name is timestamped. Therefore, + we search for an output directory based on config.output_path_basename. + + Parameters + ---------- + path_to_config : Path + path to the input config used to generate the + output. + + parent_dir_of_install_path : str, optional + If the install_path in the input config is relative to the + directory the script is launched from (as is the case in the + default_config.json file), the absolute path to its parent_dir + must be specified here. If the paths to install_path is + absolute, this input is not required. By default "". + """ + + # load input config + with open(path_to_config) as config: + config_dict = json.load(config) + config = CellfinderConfig(**config_dict) + + # check one output directory exists and + # it has expected output file inside it + output_path_without_timestamp = ( + Path(parent_dir_of_install_path) + / config.install_path + / config.output_path_basename_relative + ) + output_path_timestamped = [ + x + for x in output_path_without_timestamp.parent.glob("*") + if x.is_dir() and x.name.startswith(output_path_without_timestamp.name) + ] + + assert len(output_path_timestamped) == 1 + assert (output_path_timestamped[0]).exists() + assert ( + output_path_timestamped[0] / config.detected_cells_filename + ).is_file()