From 017f005333d3c234eda15a0eed71ef8e02e2499a Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 10 Jan 2024 18:31:56 +0000 Subject: [PATCH] Cellfinder config + path change (#64) * pass all parameters to cellfinder_run * changed required, optional and internal fields. read_config passes. add method to config class (WIP) * add signal and background data from local * add missing data config fixtures * mark GIN download data test as slow * refactor config class methods and remove setup_workflow * tests pass with default config * remove unused fixtures * update entry point * change cellfinder-core to cellfinder in pyproject * update cellfinder to cellfinder_core * renaming cellfinder script workflow to cellfinder_core. fix entrypoint test. * docstrings for class config * fix entry point * skip add input paths test * feedback from code review --- benchmarks/cellfinder_core.py | 26 +- .../cellfinder_core/cellfinder.py | 420 ---------------- .../cellfinder_core/cellfinder_core.py | 463 ++++++++++++++++++ brainglobe_workflows/configs/cellfinder.json | 6 - pyproject.toml | 13 +- tests/cellfinder_core/conftest.py | 97 +--- .../test_integration/test_cellfinder.py | 89 +--- .../test_unit/test_cellfinder.py | 295 +++++------ tests/data/input_data_GIN.json | 39 -- tests/data/input_data_locally.json | 37 -- tests/data/input_data_missing_background.json | 37 -- tests/data/input_data_missing_signal.json | 37 -- tests/data/input_data_not_locally_or_GIN.json | 37 -- 13 files changed, 623 insertions(+), 973 deletions(-) delete mode 100644 brainglobe_workflows/cellfinder_core/cellfinder.py create mode 100644 brainglobe_workflows/cellfinder_core/cellfinder_core.py delete mode 100644 tests/data/input_data_GIN.json delete mode 100644 tests/data/input_data_locally.json delete mode 100644 tests/data/input_data_missing_background.json delete mode 100644 tests/data/input_data_missing_signal.json delete mode 100644 tests/data/input_data_not_locally_or_GIN.json diff --git a/benchmarks/cellfinder_core.py b/benchmarks/cellfinder_core.py index 453db210..41821d94 100644 --- a/benchmarks/cellfinder_core.py +++ b/benchmarks/cellfinder_core.py @@ -7,11 +7,11 @@ from cellfinder.core.main import main as cellfinder_run from cellfinder.core.tools.IO import read_with_dask -from brainglobe_workflows.cellfinder_core.cellfinder import ( +from brainglobe_workflows.cellfinder_core.cellfinder_core import ( CellfinderConfig, run_workflow_from_cellfinder_run, ) -from brainglobe_workflows.cellfinder_core.cellfinder import ( +from brainglobe_workflows.cellfinder_core.cellfinder_core import ( setup as setup_cellfinder_workflow, ) from brainglobe_workflows.utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER @@ -112,14 +112,14 @@ def setup_cache( _ = pooch.retrieve( url=config.data_url, known_hash=config.data_hash, - path=config.install_path, + path=config._install_path, progressbar=True, processor=pooch.Unzip(extract_dir=config.data_dir_relative), ) # Check paths to input data should now exist in config - assert Path(config.signal_dir_path).exists() - assert Path(config.background_dir_path).exists() + assert Path(config._signal_dir_path).exists() + assert Path(config._background_dir_path).exists() def setup(self): """ @@ -146,7 +146,7 @@ def teardown(self): The input data is kept for all repeats of the same benchmark, to avoid repeated downloads from GIN. """ - shutil.rmtree(Path(self.cfg.output_path).resolve()) + shutil.rmtree(Path(self.cfg._output_path).resolve()) class TimeFullWorkflow(TimeBenchmarkPrepGIN): @@ -177,10 +177,10 @@ class TimeReadInputDask(TimeBenchmarkPrepGIN): """ def time_read_signal_with_dask(self): - read_with_dask(self.cfg.signal_dir_path) + read_with_dask(self.cfg._signal_dir_path) def time_read_background_with_dask(self): - read_with_dask(self.cfg.background_dir_path) + read_with_dask(self.cfg._background_dir_path) class TimeDetectCells(TimeBenchmarkPrepGIN): @@ -199,8 +199,8 @@ def setup(self): TimeBenchmarkPrepGIN.setup(self) # add input data as arrays to config - self.signal_array = read_with_dask(self.cfg.signal_dir_path) - self.background_array = read_with_dask(self.cfg.background_dir_path) + self.signal_array = read_with_dask(self.cfg._signal_dir_path) + self.background_array = read_with_dask(self.cfg._background_dir_path) def time_cellfinder_run(self): cellfinder_run( @@ -215,8 +215,8 @@ def setup(self): TimeBenchmarkPrepGIN.setup(self) # add input data as arrays to config - self.signal_array = read_with_dask(self.cfg.signal_dir_path) - self.background_array = read_with_dask(self.cfg.background_dir_path) + self.signal_array = read_with_dask(self.cfg._signal_dir_path) + self.background_array = read_with_dask(self.cfg._background_dir_path) # detect cells self.detected_cells = cellfinder_run( @@ -224,4 +224,4 @@ def setup(self): ) def time_save_cells(self): - save_cells(self.detected_cells, self.cfg.detected_cells_path) + save_cells(self.detected_cells, self.cfg._detected_cells_path) diff --git a/brainglobe_workflows/cellfinder_core/cellfinder.py b/brainglobe_workflows/cellfinder_core/cellfinder.py deleted file mode 100644 index ec6dfa60..00000000 --- a/brainglobe_workflows/cellfinder_core/cellfinder.py +++ /dev/null @@ -1,420 +0,0 @@ -"""This script reproduces the most common cellfinder workflow - -It receives as an (optional) command line input the path to a configuration -json file, that holds the values of the required parameters for the workflow. - -If no input json file is passed as a configuration, the default -configuration defined at brainglobe_workflows/cellfinder/default_config.json -is used. - -Example usage: - - to pass a custom configuration, run (from the cellfinder_main.py - parent directory): - python cellfinder_main.py --config path/to/input/config.json - - to use the default configuration, run - python cellfinder_main.py - - -""" - - -import datetime -import json -import logging -import os -import sys -from dataclasses import dataclass -from pathlib import Path -from typing import Optional, Tuple, Union - -import pooch -from brainglobe_utils.IO.cells import save_cells -from cellfinder_core.main import main as cellfinder_run -from cellfinder_core.tools.IO import read_with_dask -from cellfinder_core.train.train_yml import depth_type - -from brainglobe_workflows.utils import ( - DEFAULT_JSON_CONFIG_PATH_CELLFINDER, - config_parser, - setup_logger, -) -from brainglobe_workflows.utils import __name__ as LOGGER_NAME - -Pathlike = Union[str, os.PathLike] - - -@dataclass -class CellfinderConfig: - """ - Define input and output data locations, and the parameters for - the cellfinder preprocessing steps. - """ - - # input data - # data_dir_relative: parent directory to signal and background, - # relative to install path - data_dir_relative: Pathlike - signal_subdir: str - background_subdir: str - - # output - output_path_basename_relative: Pathlike - detected_cells_filename: Pathlike - - # preprocessing parameters - voxel_sizes: Tuple[float, float, float] - start_plane: int - end_plane: int - trained_model: Optional[ - os.PathLike - ] # if None, it will use a default model - model_weights: Optional[os.PathLike] - model: str - batch_size: int - n_free_cpus: int - network_voxel_sizes: Tuple[int, int, int] - soma_diameter: int - ball_xy_size: int - ball_z_size: int - ball_overlap_fraction: float - log_sigma_size: float - n_sds_above_mean_thresh: int - soma_spread_factor: float - max_cluster_size: int - cube_width: int - cube_height: int - cube_depth: int - network_depth: depth_type - - # install path (root for all inputs and outputs) - install_path: Pathlike = ".cellfinder_workflows" - - # origin of data to download (if required) - data_url: Optional[str] = None - data_hash: Optional[str] = None - - # The following attributes are added - # during the setup phase of the workflow - list_signal_files: Optional[list] = None - list_background_files: Optional[list] = None - output_path: Pathlike = "" - detected_cells_path: Pathlike = "" - signal_dir_path: Pathlike = "" - background_dir_path: Pathlike = "" - - -def read_cellfinder_config(input_config_path: Path): - """Instantiate a CellfinderConfig from the input json file - (assumes config is json serializable) - - - Parameters - ---------- - input_config_path : Path - Absolute path to a cellfinder config file - - Returns - ------- - CellfinderConfig: - The cellfinder config object, populated with data from the input - """ - # read input config - with open(input_config_path) as cfg: - config_dict = json.load(cfg) - config = CellfinderConfig(**config_dict) - - return config - - -def add_signal_and_background_files( - config: CellfinderConfig, -) -> CellfinderConfig: - """ - Adds the lists of input data files (signal and background) - to the config. - - These files are first searched locally. If not found, we - attempt to download them from GIN. - - Specifically: - - If both parent data directories (signal and background) exist locally, - the lists of signal and background files are added to the config. - - If exactly one of the parent data directories is missing, an error - message is logged. - - If neither of them exist, the data is retrieved from the provided GIN - repository. If no URL or hash to GIN is provided, an error is thrown. - - Parameters - ---------- - config : CellfinderConfig - a cellfinder config with input data files to be validated - - Returns - ------- - config : CellfinderConfig - a cellfinder config with updated input data lists. - """ - # Fetch logger - logger = logging.getLogger(LOGGER_NAME) - - # Check if input data directories (signal and background) exist locally. - # If both directories exist, get list of signal and background files - if ( - Path(config.signal_dir_path).exists() - and Path(config.background_dir_path).exists() - ): - logger.info("Fetching input data from the local directories") - - config.list_signal_files = [ - f - for f in Path(config.signal_dir_path).resolve().iterdir() - if f.is_file() - ] - config.list_background_files = [ - f - for f in Path(config.background_dir_path).resolve().iterdir() - if f.is_file() - ] - - # If exactly one of the input data directories is missing, print error - elif ( - Path(config.signal_dir_path).resolve().exists() - or Path(config.background_dir_path).resolve().exists() - ): - if not Path(config.signal_dir_path).resolve().exists(): - logger.error( - f"The directory {config.signal_dir_path} does not exist" - ) - else: - logger.error( - f"The directory {config.background_dir_path} " "does not exist" - ) - - # If neither of the input data directories exist, - # retrieve data from GIN repository and add list of files to config - else: - # Check if GIN URL and hash are defined (log error otherwise) - if config.data_url and config.data_hash: - # get list of files in GIN archive with pooch.retrieve - list_files_archive = pooch.retrieve( - url=config.data_url, - known_hash=config.data_hash, - path=config.install_path, # zip will be downloaded here - progressbar=True, - processor=pooch.Unzip( - extract_dir=config.data_dir_relative - # path to unzipped dir, - # *relative* to the path set in 'path' - ), - ) - logger.info("Fetching input data from the provided GIN repository") - - # Check signal and background parent directories exist now - assert Path(config.signal_dir_path).resolve().exists() - assert Path(config.background_dir_path).resolve().exists() - - # Add signal files to config - config.list_signal_files = [ - f - for f in list_files_archive - if f.startswith( - str(Path(config.signal_dir_path).resolve()) - ) # if str(config.signal_dir_path) in f - ] - - # Add background files to config - config.list_background_files = [ - f - for f in list_files_archive - if f.startswith( - str(Path(config.background_dir_path).resolve()) - ) - ] - # If one of URL/hash to GIN repo not defined, throw an error - else: - logger.error( - "Input data not found locally, and URL/hash to " - "GIN repository not provided" - ) - - return config - - -def setup_workflow(input_config_path: Path) -> CellfinderConfig: - """Run setup steps prior to executing the workflow - - These setup steps include: - - instantiating a CellfinderConfig object with the required parameters, - - checking if the input data exists locally, and fetching from - GIN repository otherwise, - - adding the path to the input data files to the config, and - - creating a timestamped directory for the output of the workflow if - it doesn't exist and adding its path to the config - - Parameters - ---------- - input_config_path : Path - path to the input config file - - Returns - ------- - config : CellfinderConfig - a dataclass whose attributes are the parameters - for running cellfinder. - """ - - # Fetch logger - logger = logging.getLogger(LOGGER_NAME) - - # Check config file exists - assert input_config_path.exists() - - # Instantiate a CellfinderConfig from the input json file - # (assumes config is json serializable) - config = read_cellfinder_config(input_config_path) - - # Print info logs for status - logger.info(f"Input config read from {input_config_path}") - if input_config_path == DEFAULT_JSON_CONFIG_PATH_CELLFINDER: - logger.info("Using default config file") - - # Add lists of input data files to the config, - # if these are not defined yet - if not (config.list_signal_files and config.list_background_files): - # build fullpaths to input directories - config.signal_dir_path = str( - Path(config.install_path) - / config.data_dir_relative - / config.signal_subdir - ) - config.background_dir_path = str( - Path(config.install_path) - / config.data_dir_relative - / config.background_subdir - ) - - # add signal and background files to config - config = add_signal_and_background_files(config) - - # Create timestamped output directory if it doesn't exist - timestamp = datetime.datetime.now() - timestamp_formatted = timestamp.strftime("%Y%m%d_%H%M%S") - output_path_timestamped = Path(config.install_path) / ( - str(config.output_path_basename_relative) + timestamp_formatted - ) - output_path_timestamped.mkdir( - parents=True, # create any missing parents - exist_ok=True, # ignore FileExistsError exceptions - ) - - # Add output path and output file path to config - config.output_path = output_path_timestamped - config.detected_cells_path = ( - config.output_path / config.detected_cells_filename - ) - - return config - - -def setup(input_config_path: str) -> CellfinderConfig: - # setup logger - _ = setup_logger() - - # run setup steps and return config - cfg = setup_workflow(Path(input_config_path)) - - return cfg - - -def run_workflow_from_cellfinder_run(cfg: CellfinderConfig): - """ - Run workflow based on the cellfinder_core.main.main() - function. - - The steps are: - 1. Read the input signal and background data as two separate - Dask arrays. - 2. Run the main cellfinder pipeline on the input Dask arrays, - with the parameters defined in the input configuration (cfg). - 3. Save the detected cells as an xml file to the location specified in - the input configuration (cfg). - - Parameters - ---------- - cfg : CellfinderConfig - a class with the required setup methods and parameters for - the cellfinder workflow - """ - # Read input data as Dask arrays - signal_array = read_with_dask(cfg.signal_dir_path) - background_array = read_with_dask(cfg.background_dir_path) - - # Run main analysis using `cellfinder_run` - detected_cells = cellfinder_run( - signal_array, background_array, cfg.voxel_sizes - ) - - # Save results to xml file - save_cells( - detected_cells, - cfg.detected_cells_path, - ) - - -def main( - input_config: str = str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER), -) -> CellfinderConfig: - """ - Setup and run cellfinder workflow. - - This function runs the setup steps required - to run the cellfinder workflow, and the - workflow itself. Note that only the workflow - will be benchmarked. - - Parameters - ---------- - input_config : str, optional - Absolute path to input config file, - by default str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER) - - Returns - ------- - cfg : CellfinderConfig - a class with the required setup methods and parameters for - the cellfinder workflow - """ - # run setup - cfg = setup(input_config) - - # run workflow - run_workflow_from_cellfinder_run(cfg) # only this will be benchmarked - - return cfg - - -def main_app_wrapper(): - """ - Parse command line arguments and - run cellfinder setup and workflow - - This function is used to define an entry-point, - that allows the user to run the cellfinder workflow - for a given input config file as: - `cellfinder-workflow --config `. - - If no input config file is provided, the default is used. - - """ - # parse CLI arguments - args = config_parser( - sys.argv[1:], # sys.argv[0] is the script name - str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER), - ) - - # run setup and workflow - _ = main(args.config) - - -if __name__ == "__main__": - main_app_wrapper() diff --git a/brainglobe_workflows/cellfinder_core/cellfinder_core.py b/brainglobe_workflows/cellfinder_core/cellfinder_core.py new file mode 100644 index 00000000..d16e958c --- /dev/null +++ b/brainglobe_workflows/cellfinder_core/cellfinder_core.py @@ -0,0 +1,463 @@ +"""This script reproduces the most common cellfinder workflow + +It receives as an (optional) command line input the path to a configuration +json file, that holds the values of the required parameters for the workflow. + +If no input json file is passed as a configuration, the default +configuration defined at brainglobe_workflows/cellfinder/default_config.json +is used. + +Example usage: + - to pass a custom configuration, run (from the cellfinder_main.py + parent directory): + python cellfinder_main.py --config path/to/input/config.json + - to use the default configuration, run + python cellfinder_main.py + + +""" + + +import datetime +import json +import logging +import os +import sys +from dataclasses import dataclass +from pathlib import Path +from typing import Optional, Union + +import pooch +from brainglobe_utils.IO.cells import save_cells +from cellfinder.core.main import main as cellfinder_run +from cellfinder.core.tools.IO import read_with_dask +from cellfinder.core.train.train_yml import depth_type + +from brainglobe_workflows.utils import ( + DEFAULT_JSON_CONFIG_PATH_CELLFINDER, + config_parser, + setup_logger, +) +from brainglobe_workflows.utils import __name__ as LOGGER_NAME + +Pathlike = Union[str, os.PathLike] + + +@dataclass +class CellfinderConfig: + """Define the parameters for the cellfinder workflow. + + There are three types of fields: + - required attributes: must be provided, they do not have a default value; + - optional attributes: they have a default value if not specified; + - internal attributes: their names start with _, indicating these are + private. Any functionality to update them should be a class method. + """ + + # Required parameters + voxel_sizes: tuple[float, float, float] + start_plane: int + end_plane: int + trained_model: Optional[os.PathLike] + model_weights: Optional[os.PathLike] + model: str + batch_size: int + n_free_cpus: int + network_voxel_sizes: tuple[int, int, int] + soma_diameter: int + ball_xy_size: int + ball_z_size: int + ball_overlap_fraction: float + log_sigma_size: float + n_sds_above_mean_thresh: int + soma_spread_factor: float + max_cluster_size: int + cube_width: int + cube_height: int + cube_depth: int + network_depth: depth_type + + # Optional parameters + + # install path: default path for downloaded and output data + _install_path: Pathlike = ( + Path.home() / ".brainglobe" / "workflows" / "cellfinder_core" + ) + + # input data paths + # Note: if not specified, the signal and background data + # are assumed to be under "signal" and "background" + # dirs under _install_path/cellfinder_test_data/ + # (see __post_init__ method) + input_data_dir: Optional[Pathlike] = None + signal_subdir: Pathlike = "signal" + background_subdir: Pathlike = "background" + + # output data paths + # Note: if output_parent_dir is not specified, + # it is assumed to be under _install_path + # (see __post_init__ method) + output_dir_basename: str = "cellfinder_output_" + detected_cells_filename: str = "detected_cells.xml" + output_parent_dir: Optional[Pathlike] = None + + # source of data to download + # if not specified in JSON, it is set to None + data_url: Optional[str] = None + data_hash: Optional[str] = None + + # Internal parameters + # even though these are optional we don't expect users to + # change them + _signal_dir_path: Optional[Pathlike] = None + _background_dir_path: Optional[Pathlike] = None + _list_signal_files: Optional[list] = None + _list_background_files: Optional[list] = None + _detected_cells_path: Pathlike = "" + _output_path: Pathlike = "" + + def __post_init__(self: "CellfinderConfig"): + """Executed after __init__ function. + + We use this method to define attributes of the data class + as a function of other attributes. + See https://peps.python.org/pep-0557/#post-init-processing + + The attributes added are input and output data paths + + Parameters + ---------- + self : CellfinderConfig + a CellfinderConfig instance + """ + + # Add input data paths to config + self.add_input_paths() + + # Add output paths to config + self.add_output_paths() + + def add_output_paths(self): + """Adds output paths to the config + + Specifically, it adds: + - output_parent_dir: set to a a timestamped output directory if not + set in __init__(); + - _detected_cells_path: path to the output file + + Parameters + ---------- + config : CellfinderConfig + a cellfinder config + """ + + # Fill in output directory if not specified + if self.output_parent_dir is None: + self.output_parent_dir = Path(self._install_path) + + # Add to config the path to timestamped output directory + timestamp = datetime.datetime.now() + timestamp_formatted = timestamp.strftime("%Y%m%d_%H%M%S") + self._output_path = Path(self.output_parent_dir) / ( + str(self.output_dir_basename) + timestamp_formatted + ) + self._output_path.mkdir( + parents=True, # create any missing parents + exist_ok=True, # ignore FileExistsError exceptions + ) + + # Add to config the path to the output file + self._detected_cells_path = ( + self._output_path / self.detected_cells_filename + ) + + def add_input_paths(self): + """Adds input data paths to the config. + + Specifically, it adds: + - input_data_dir: set to a default value if not set in __init__(); + - _signal_dir_path: full path to the directory with the signal files + - _background_dir_path: full path to the directory with the + background files. + - _list_signal_files: list of signal files + - _list_background_files: list of background files + + Parameters + ---------- + config : CellfinderConfig + a cellfinder config with input data files to be validated + + Notes + ----- + The signal and background files are first searched locally at the + given location. If not found, we attempt to download them from GIN + and place them at the specified location (input_data_dir). + + - If both parent data directories (signal and background) exist + locally, the lists of signal and background files are added to + the config. + - If exactly one of the parent data directories is missing, an error + message is logged. + - If neither of them exist, the data is retrieved from the provided GIN + repository. If no URL or hash to GIN is provided, an error is thrown. + + """ + # Fetch logger + logger = logging.getLogger(LOGGER_NAME) + + # Fill in input data directory if not specified + if self.input_data_dir is None: + self.input_data_dir = ( + Path(self._install_path) / "cellfinder_test_data" + ) + + # Fill in signal and background paths derived from 'input_data_dir' + self._signal_dir_path = self.input_data_dir / Path(self.signal_subdir) + self._background_dir_path = self.input_data_dir / Path( + self.background_subdir + ) + + # Check if input data directories (signal and background) exist + # locally. + # If both directories exist, get list of signal and background files + if ( + Path(self._signal_dir_path).exists() + and Path(self._background_dir_path).exists() + ): + logger.info("Fetching input data from the local directories") + + self._list_signal_files = [ + f + for f in Path(self._signal_dir_path).resolve().iterdir() + if f.is_file() + ] + self._list_background_files = [ + f + for f in Path(self._background_dir_path).resolve().iterdir() + if f.is_file() + ] + + # If exactly one of the input data directories is missing, print error + elif ( + Path(self._signal_dir_path).resolve().exists() + or Path(self._background_dir_path).resolve().exists() + ): + if not Path(self._signal_dir_path).resolve().exists(): + logger.error( + f"The directory {self._signal_dir_path} does not exist", + ) + else: + logger.error( + f"The directory {self._background_dir_path} " + "does not exist", + ) + + # If neither of the input data directories exist, + # retrieve data from GIN repository and add list of files to config + else: + # Check if GIN URL and hash are defined (log error otherwise) + if self.data_url and self.data_hash: + # get list of files in GIN archive with pooch.retrieve + list_files_archive = pooch.retrieve( + url=self.data_url, + known_hash=self.data_hash, + path=Path( + self.input_data_dir + ).parent, # zip will be downloaded here + progressbar=True, + processor=pooch.Unzip( + extract_dir=Path(self.input_data_dir).stem, + # files are unpacked here, a dir + # *relative* to the path set in 'path' + ), + ) + logger.info( + "Fetching input data from the provided GIN repository" + ) + + # Check signal and background parent directories exist now + assert Path(self._signal_dir_path).resolve().exists() + assert Path(self._background_dir_path).resolve().exists() + + # Add signal files to config + self._list_signal_files = [ + f + for f in list_files_archive + if f.startswith( + str(Path(self._signal_dir_path).resolve()), + ) + ] + + # Add background files to config + self._list_background_files = [ + f + for f in list_files_archive + if f.startswith( + str(Path(self._background_dir_path).resolve()), + ) + ] + # If one of URL/hash to GIN repo not defined, throw an error + else: + logger.error( + "Input data not found locally, and URL/hash to " + "GIN repository not provided", + ) + + +def read_cellfinder_config( + input_config_path: str, log_on: bool = False +) -> CellfinderConfig: + """Instantiate a CellfinderConfig from the input json file. + + Assumes config is json serializable. + + Parameters + ---------- + input_config_path : Path + Absolute path to a cellfinder config file + log_on : bool, optional + whether to log the info messages from reading the config + to the logger, by default False + + Returns + ------- + CellfinderConfig: + The cellfinder config object, populated with data from the input + """ + + # read input config + with open(input_config_path) as cfg: + config_dict = json.load(cfg) + config = CellfinderConfig(**config_dict) + + # print config's origin to log if required + if log_on: + logger = logging.getLogger(LOGGER_NAME) + logger.info(f"Input config read from {input_config_path}") + if input_config_path == DEFAULT_JSON_CONFIG_PATH_CELLFINDER: + logger.info("Using default config file") + + return config + + +def setup(input_config_path: str) -> CellfinderConfig: + # setup logger + _ = setup_logger() + + # read config + cfg = read_cellfinder_config(input_config_path) + + return cfg + + +def run_workflow_from_cellfinder_run(cfg: CellfinderConfig): + """Run workflow based on the cellfinder.core.main.main() + function. + + The steps are: + 1. Read the input signal and background data as two separate + Dask arrays. + 2. Run the main cellfinder pipeline on the input Dask arrays, + with the parameters defined in the input configuration (cfg). + 3. Save the detected cells as an xml file to the location specified in + the input configuration (cfg). + + Parameters + ---------- + cfg : CellfinderConfig + a class with the required setup methods and parameters for + the cellfinder workflow + """ + # Read input data as Dask arrays + signal_array = read_with_dask(str(cfg._signal_dir_path)) + background_array = read_with_dask(str(cfg._background_dir_path)) + + # Run main analysis using `cellfinder_run` + detected_cells = cellfinder_run( + signal_array, + background_array, + cfg.voxel_sizes, + cfg.start_plane, + cfg.end_plane, + cfg.trained_model, + cfg.model_weights, + cfg.model, + cfg.batch_size, + cfg.n_free_cpus, + cfg.network_voxel_sizes, + cfg.soma_diameter, + cfg.ball_xy_size, + cfg.ball_z_size, + cfg.ball_overlap_fraction, + cfg.log_sigma_size, + cfg.n_sds_above_mean_thresh, + cfg.soma_spread_factor, + cfg.max_cluster_size, + cfg.cube_width, + cfg.cube_height, + cfg.cube_depth, + cfg.network_depth, + ) + + # Save results to xml file + save_cells( + detected_cells, + cfg._detected_cells_path, + ) + + +def main( + input_config: str = str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER), +) -> CellfinderConfig: + """Setup and run cellfinder workflow. + + This function runs the setup steps required + to run the cellfinder workflow, and the + workflow itself. Note that only the workflow + will be benchmarked. + + Parameters + ---------- + input_config : str, optional + Absolute path to input config file, + by default str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER) + + Returns + ------- + cfg : CellfinderConfig + a class with the required setup methods and parameters for + the cellfinder workflow + """ + # run setup + cfg = setup(input_config) + + # run workflow + run_workflow_from_cellfinder_run(cfg) # only this will be benchmarked + + return cfg + + +def main_app_wrapper(): + """Parse command line arguments and + run cellfinder setup and workflow + + This function is used to define an entry-point, + that allows the user to run the cellfinder workflow + for a given input config file as: + `cellfinder-workflow --config `. + + If no input config file is provided, the default is used. + + """ + # parse CLI arguments + args = config_parser( + sys.argv[1:], # sys.argv[0] is the script name + str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER), + ) + + # run setup and workflow + _ = main(args.config) + + +if __name__ == "__main__": + main_app_wrapper() diff --git a/brainglobe_workflows/configs/cellfinder.json b/brainglobe_workflows/configs/cellfinder.json index daf056a5..e977271c 100644 --- a/brainglobe_workflows/configs/cellfinder.json +++ b/brainglobe_workflows/configs/cellfinder.json @@ -1,12 +1,6 @@ { - "install_path": ".cellfinder_workflows", "data_url": "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip", "data_hash": "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914", - "data_dir_relative": "cellfinder_test_data", - "signal_subdir": "signal", - "background_subdir": "background", - "output_path_basename_relative": "cellfinder_output_", - "detected_cells_filename": "detected_cells.xml", "voxel_sizes": [ 5, 2, diff --git a/pyproject.toml b/pyproject.toml index c17a4252..6ba3d3ce 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,8 +24,8 @@ classifiers = [ "Topic :: Scientific/Engineering :: Image Recognition", ] -# Below the dependenciess for the cellfinder CLI tool only -# (i.e., only what users will need for the CLI) +# Below the dependenciess for brainmapper (the cellfinder CLI tool) only +# (i.e., only what users will need for brainmapper) dependencies = [ "brainglobe>=1.0.0", "brainreg>=1.0.0", @@ -74,7 +74,7 @@ napari = ["napari[pyqt5]", "brainglobe-napari-io", "cellfinder[napari]>=1.0.0"] "Source Code" = "https://github.com/brainglobe/brainglobe-workflows" [project.scripts] -cellfinder-workflow = "brainglobe_workflows.cellfinder_core.cellfinder:main_app_wrapper" +cellfinder-workflow = "brainglobe_workflows.cellfinder_core.cellfinder_core:main_app_wrapper" brainmapper = "brainglobe_workflows.brainmapper.main:main" [build-system] @@ -88,11 +88,10 @@ zip-safe = false [tool.setuptools.packages.find] include = ["brainglobe_workflows"] exclude = [ - "brainglobe_workflows.cellfinder_core", "tests", "resources", "benchmarks", -] # it's not excluding "brainglobe_workflows.cellfinder_core"! +] [tool.black] target-version = ["py39", "py310"] @@ -143,10 +142,10 @@ INPUT_COREDEV = extras = dev deps = - coredev: git+https://github.com/brainglobe/cellfinder-core.git + coredev: git+https://github.com/brainglobe/cellfinder.git commands = pytest {toxinidir} -v --color=yes --cov=./ --cov-report=xml description = Run tests - coredev: Run tests with the development version of cellfinder-core + coredev: Run tests with the development version of cellfinder """ diff --git a/tests/cellfinder_core/conftest.py b/tests/cellfinder_core/conftest.py index 749d83d3..127a4483 100644 --- a/tests/cellfinder_core/conftest.py +++ b/tests/cellfinder_core/conftest.py @@ -2,99 +2,36 @@ from pathlib import Path -import pooch import pytest -from brainglobe_workflows.cellfinder_core.cellfinder import ( - read_cellfinder_config, -) - @pytest.fixture() -def input_configs_dir() -> Path: - """Return the directory path to the input configs - used for testing +def default_input_config_cellfinder() -> Path: + """Return path to default input config for cellfinder workflow Returns ------- Path - Test data directory path - """ - return Path(__file__).parents[1] / "data" - + Path to default input config -@pytest.fixture(scope="session") -def cellfinder_GIN_data() -> dict: - """Return the URL and hash to the GIN repository with the input data - - Returns - ------- - dict - URL and hash of the GIN repository with the cellfinder test data """ - return { - "url": "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip", - "hash": "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914", # noqa - } + from brainglobe_workflows.utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER + return DEFAULT_JSON_CONFIG_PATH_CELLFINDER -@pytest.fixture() -def input_config_fetch_GIN(input_configs_dir: Path) -> Path: - """ - Return the cellfinder config json file that is configured to fetch from GIN - Parameters - ---------- - input_configs_dir : Path - Path to the directory holding the test config files. +@pytest.fixture(autouse=True) +def mock_home_directory(monkeypatch: pytest.MonkeyPatch): + # define mock home path + home_path = Path.home() # actual home path + mock_home_path = home_path / ".brainglobe-tests" # tmp_path # - Returns - ------- - Path - Path to the config json file for fetching data from GIN - """ - return input_configs_dir / "input_data_GIN.json" - - -@pytest.fixture() -def input_config_fetch_local( - input_configs_dir: Path, - cellfinder_GIN_data: dict, -) -> Path: - """ - Download the cellfinder data locally and return the config json - file configured to fetch local data. - - The data is downloaded to a directory under the current working - directory (that is, to a directory under the directory from where - pytest is launched). - - Parameters - ---------- - input_configs_dir : Path - Path to the directory holding the test config files. - cellfinder_GIN_data : dict - URL and hash of the GIN repository with the cellfinder test data - - Returns - ------- - Path - Path to the config json file for fetching data locally - """ - # read local config - input_config_path = input_configs_dir / "input_data_locally.json" - config = read_cellfinder_config(input_config_path) + # create dir if it doesn't exist + if not mock_home_path.exists(): + mock_home_path.mkdir() - # fetch data from GIN and download locally - pooch.retrieve( - url=cellfinder_GIN_data["url"], - known_hash=cellfinder_GIN_data["hash"], - path=config.install_path, # path to download zip to - progressbar=True, - processor=pooch.Unzip( - extract_dir=config.data_dir_relative - # path to unzipped dir, *relative* to 'path' - ), - ) + # monkeypatch Path.home() to point to the mock home + def mock_home(): + return mock_home_path - return input_config_path + monkeypatch.setattr(Path, "home", mock_home) diff --git a/tests/cellfinder_core/test_integration/test_cellfinder.py b/tests/cellfinder_core/test_integration/test_cellfinder.py index 1bd120ba..26f22ec8 100644 --- a/tests/cellfinder_core/test_integration/test_cellfinder.py +++ b/tests/cellfinder_core/test_integration/test_cellfinder.py @@ -1,27 +1,11 @@ import subprocess import sys from pathlib import Path -from typing import Optional - -import pytest - -from brainglobe_workflows.cellfinder_core.cellfinder import main - - -@pytest.mark.parametrize( - "input_config", - [ - None, - "input_config_fetch_GIN", - "input_config_fetch_local", - ], -) -def test_main( - input_config: Optional[str], - monkeypatch: pytest.MonkeyPatch, - tmp_path: Path, - request: pytest.FixtureRequest, -): + +from brainglobe_workflows.cellfinder_core.cellfinder_core import main + + +def test_main(): """Test main function for setting up and running cellfinder workflow Parameters @@ -35,35 +19,15 @@ def test_main( request : pytest.FixtureRequest Pytest fixture to enable requesting fixtures by name """ - # monkeypatch to change current directory to - # pytest temporary directory - # (cellfinder cache directory is created in cwd) - monkeypatch.chdir(tmp_path) # run main - if not input_config: - cfg = main() - else: - cfg = main(str(request.getfixturevalue(input_config))) + cfg = main() # check output files exist - assert Path(cfg.detected_cells_path).is_file() - - -@pytest.mark.parametrize( - "input_config", - [ - None, - "input_config_fetch_GIN", - "input_config_fetch_local", - ], -) -def test_script( - input_config: Optional[str], - monkeypatch: pytest.MonkeyPatch, - tmp_path: Path, - request: pytest.FixtureRequest, -): + assert Path(cfg._detected_cells_path).is_file() + + +def test_script(): """Test running the cellfinder worklfow from the command line Parameters @@ -77,26 +41,18 @@ def test_script( request : pytest.FixtureRequest Pytest fixture to enable requesting fixtures by name """ - # monkeypatch to change current directory to - # pytest temporary directory - # (cellfinder cache directory is created in cwd) - monkeypatch.chdir(tmp_path) # define CLI input script_path = ( Path(__file__).resolve().parents[3] / "brainglobe_workflows" / "cellfinder_core" - / "cellfinder.py" + / "cellfinder_core.py" ) subprocess_input = [ sys.executable, str(script_path), ] - # append config if required - if input_config: - subprocess_input.append("--config") - subprocess_input.append(str(request.getfixturevalue(input_config))) # run workflow script from the CLI subprocess_output = subprocess.run( @@ -107,20 +63,7 @@ def test_script( assert subprocess_output.returncode == 0 -@pytest.mark.parametrize( - "input_config", - [ - None, - "input_config_fetch_GIN", - "input_config_fetch_local", - ], -) -def test_entry_point( - input_config: Optional[str], - monkeypatch: pytest.MonkeyPatch, - tmp_path: Path, - request: pytest.FixtureRequest, -): +def test_entry_point(): """Test running the cellfinder workflow via the predefined entry point Parameters @@ -134,17 +77,9 @@ def test_entry_point( request : pytest.FixtureRequest Pytest fixture to enable requesting fixtures by name """ - # monkeypatch to change current directory to - # pytest temporary directory - # (cellfinder cache directory is created in cwd) - monkeypatch.chdir(tmp_path) # define CLI input subprocess_input = ["cellfinder-workflow"] - # append config if required - if input_config: - subprocess_input.append("--config") - subprocess_input.append(str(request.getfixturevalue(input_config))) # run workflow with no CLI arguments, subprocess_output = subprocess.run( diff --git a/tests/cellfinder_core/test_unit/test_cellfinder.py b/tests/cellfinder_core/test_unit/test_cellfinder.py index 87f65623..d31cfb91 100644 --- a/tests/cellfinder_core/test_unit/test_cellfinder.py +++ b/tests/cellfinder_core/test_unit/test_cellfinder.py @@ -6,43 +6,74 @@ import pooch import pytest -from brainglobe_workflows.cellfinder_core.cellfinder import ( - CellfinderConfig, - add_signal_and_background_files, - read_cellfinder_config, - run_workflow_from_cellfinder_run, - setup_workflow, -) -from brainglobe_workflows.cellfinder_core.cellfinder import setup as setup_full from brainglobe_workflows.utils import setup_logger -@pytest.fixture() -def default_input_config_cellfinder() -> Path: - """Return path to default input config for cellfinder workflow +@pytest.fixture(scope="session") +def cellfinder_GIN_data() -> dict: + """Return the URL and hash to the GIN repository with the input data Returns ------- - Path - Path to default input config - + dict + URL and hash of the GIN repository with the cellfinder test data """ - from brainglobe_workflows.utils import DEFAULT_JSON_CONFIG_PATH_CELLFINDER + return { + "url": "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip", + "hash": "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914", # noqa + } + + +@pytest.fixture() +def config_local(cellfinder_GIN_data, default_input_config_cellfinder): + """ """ + + from brainglobe_workflows.cellfinder_core.cellfinder_core import ( + CellfinderConfig, + ) - return DEFAULT_JSON_CONFIG_PATH_CELLFINDER + # read default config as dict + # as dict because some paths are computed derived from input_data_dir + with open(default_input_config_cellfinder) as cfg: + config_dict = json.load(cfg) + + # modify config: + # - remove url + # - remove data hash + # - add input_data_dir + config_dict["data_url"] = None + config_dict["data_hash"] = None + config_dict["input_data_dir"] = Path.home() / "local_cellfinder_data" + + # instantiate object + config = CellfinderConfig(**config_dict) + + # fetch data from GIN and download locally to local location? + pooch.retrieve( + url=cellfinder_GIN_data["url"], + known_hash=cellfinder_GIN_data["hash"], + path=Path(config.input_data_dir).parent, # path to download zip to + progressbar=True, + processor=pooch.Unzip( + extract_dir=Path(config.input_data_dir).stem + # path to unzipped dir, *relative* to 'path' + ), + ) + return config @pytest.mark.parametrize( - "input_config", + "input_config, message", [ - "input_data_GIN.json", - "input_data_locally.json", - "input_data_missing_background.json", - "input_data_missing_signal.json", - "input_data_not_locally_or_GIN.json", + ("default_input_config_cellfinder", "Using default config file"), ], ) -def test_read_cellfinder_config(input_config: str, input_configs_dir: Path): +def test_read_cellfinder_config( + input_config: str, + message: str, + caplog: pytest.LogCaptureFixture, + request: pytest.FixtureRequest, +): """Test for reading a cellfinder config file Parameters @@ -52,11 +83,17 @@ def test_read_cellfinder_config(input_config: str, input_configs_dir: Path): input_configs_dir : Path Test data directory path """ - # path to config json file - input_config_path = input_configs_dir / input_config + from brainglobe_workflows.cellfinder_core.cellfinder_core import ( + read_cellfinder_config, + ) + + # instantiate custom logger + _ = setup_logger() + + input_config_path = request.getfixturevalue(input_config) # read json as Cellfinder config - config = read_cellfinder_config(input_config_path) + config = read_cellfinder_config(input_config_path, log_on=True) # read json as dict with open(input_config_path) as cfg: @@ -67,37 +104,50 @@ def test_read_cellfinder_config(input_config: str, input_configs_dir: Path): [ky in config.__dataclass_fields__.keys() for ky in config_dict.keys()] ) + # check logs + assert message in caplog.text + + # check all signal files exist + assert config._list_signal_files + assert all([Path(f).is_file() for f in config._list_signal_files]) + + # check all background files exist + assert config._list_background_files + assert all([Path(f).is_file() for f in config._list_background_files]) + + # check output directory exists + assert Path(config._output_path).resolve().is_dir() + + # check output directory name has correct format + out = re.fullmatch( + str(config.output_dir_basename) + "\\d{8}_\\d{6}$", + Path(config._output_path).stem, + ) + assert out is not None + assert out.group() is not None + + # check output file path is as expected + assert ( + Path(config._detected_cells_path) + == Path(config._output_path) / config.detected_cells_filename + ) + +@pytest.mark.skip(reason="focus of PR62") @pytest.mark.parametrize( "input_config, message_pattern", [ ( - "input_data_GIN.json", - "Fetching input data from the provided GIN repository", - ), - ( - "input_data_locally.json", + "config_local", "Fetching input data from the local directories", ), - ( - "input_data_missing_background.json", - "The directory .+ does not exist$", - ), - ("input_data_missing_signal.json", "The directory .+ does not exist$"), - ( - "input_data_not_locally_or_GIN.json", - "Input data not found locally, and URL/hash to " - "GIN repository not provided", - ), ], ) -def test_add_signal_and_background_files( +def test_add_input_paths( caplog: pytest.LogCaptureFixture, - tmp_path: Path, - cellfinder_GIN_data: dict, - input_configs_dir: Path, input_config: str, message_pattern: str, + request: pytest.FixtureRequest, ): """Test signal and background files addition to the cellfinder config @@ -105,8 +155,6 @@ def test_add_signal_and_background_files( ---------- caplog : pytest.LogCaptureFixture Pytest fixture to capture the logs during testing - tmp_path : Path - Pytest fixture providing a temporary path for each test cellfinder_GIN_data : dict Dict holding the URL and hash of the cellfinder test data in GIN input_configs_dir : Path @@ -116,53 +164,12 @@ def test_add_signal_and_background_files( message_pattern : str Expected pattern in the log """ - # instantiate our custom logger + + # instantiate custom logger _ = setup_logger() # read json as Cellfinder config - config = read_cellfinder_config(input_configs_dir / input_config) - - # monkeypatch cellfinder config: - # set install_path to pytest temporary directory - config.install_path = tmp_path / config.install_path - - # check lists of signal and background files are not defined - assert not (config.list_signal_files and config.list_background_files) - - # build fullpaths to input data directories - config.signal_dir_path = str( - Path(config.install_path) - / config.data_dir_relative - / config.signal_subdir - ) - config.background_dir_path = str( - Path(config.install_path) - / config.data_dir_relative - / config.background_subdir - ) - - # monkeypatch cellfinder config: - # if config is "local" or "signal/background missing": - # ensure signal and background data from GIN are downloaded locally - if input_config in [ - "input_data_locally.json", - "input_data_missing_signal.json", - "input_data_missing_background.json", - ]: - # fetch data from GIN and download locally - pooch.retrieve( - url=cellfinder_GIN_data["url"], - known_hash=cellfinder_GIN_data["hash"], - path=config.install_path, # path to download zip to - progressbar=True, - processor=pooch.Unzip( - extract_dir=config.data_dir_relative - # path to unzipped dir, *relative* to 'path' - ), - ) - - # add signal and background files lists to config - add_signal_and_background_files(config) + _ = request.getfixturevalue(input_config) # check log messages assert len(caplog.messages) > 0 @@ -171,93 +178,10 @@ def test_add_signal_and_background_files( assert out.group() is not None -@pytest.mark.parametrize( - "input_config, message", - [ - ("default_input_config_cellfinder", "Using default config file"), - ("input_config_fetch_GIN", "Input config read from"), - ], -) -def test_setup_workflow( - input_config: str, - message: str, - monkeypatch: pytest.MonkeyPatch, - tmp_path: Path, - caplog: pytest.LogCaptureFixture, - request: pytest.FixtureRequest, -): - """Test setup steps for the cellfinder workflow, using the default config - and passing a specific config file. - - These setup steps include: - - instantiating a CellfinderConfig object using the input json file, - - add the signal and background files to the config if these are not - defined, - - create a timestamped directory for the output of the workflow if - it doesn't exist and add its path to the config - - Parameters - ---------- - input_config : str - Name of input config json file - message : str - Expected log message - monkeypatch : pytest.MonkeyPatch - Pytest fixture to use monkeypatching utils - tmp_path : Path - Pytest fixture providing a temporary path for each test - caplog : pytest.LogCaptureFixture - Pytest fixture to capture the logs during testing - request : pytest.FixtureRequest - Pytest fixture to enable requesting fixtures by name - """ - - # setup logger - _ = setup_logger() - - # monkeypatch to change current directory to - # pytest temporary directory - # (cellfinder cache directory is created in cwd) - monkeypatch.chdir(tmp_path) - - # setup workflow - config = setup_workflow(request.getfixturevalue(input_config)) - - # check logs - assert message in caplog.text - - # check all signal files exist - assert config.list_signal_files - assert all([Path(f).is_file() for f in config.list_signal_files]) - - # check all background files exist - assert config.list_background_files - assert all([Path(f).is_file() for f in config.list_background_files]) - - # check output directory exists - assert Path(config.output_path).resolve().is_dir() - - # check output directory name has correct format - out = re.fullmatch( - str(config.output_path_basename_relative) + "\\d{8}_\\d{6}$", - Path(config.output_path).stem, - ) - assert out is not None - assert out.group() is not None - - # check output file path - assert ( - Path(config.detected_cells_path) - == Path(config.output_path) / config.detected_cells_filename - ) - - @pytest.mark.parametrize( "input_config", [ "default_input_config_cellfinder", - "input_config_fetch_GIN", - "input_config_fetch_local", ], ) def test_setup( @@ -283,6 +207,13 @@ def test_setup( request : pytest.FixtureRequest Pytest fixture to enable requesting fixtures by name """ + from brainglobe_workflows.cellfinder_core.cellfinder_core import ( + CellfinderConfig, + ) + from brainglobe_workflows.cellfinder_core.cellfinder_core import ( + setup as setup_full, + ) + # Monkeypatch to change current directory to # pytest temporary directory # (cellfinder cache directory is created in cwd) @@ -304,14 +235,10 @@ def test_setup( "input_config", [ "default_input_config_cellfinder", - "input_config_fetch_GIN", - "input_config_fetch_local", ], ) def test_run_workflow_from_cellfinder_run( input_config: str, - monkeypatch: pytest.MonkeyPatch, - tmp_path: Path, request: pytest.FixtureRequest, ): """Test running cellfinder workflow with default input config @@ -328,10 +255,12 @@ def test_run_workflow_from_cellfinder_run( request : pytest.FixtureRequest Pytest fixture to enable requesting fixtures by name """ - # monkeypatch to change current directory to - # pytest temporary directory - # (cellfinder cache directory is created in cwd) - monkeypatch.chdir(tmp_path) + from brainglobe_workflows.cellfinder_core.cellfinder_core import ( + run_workflow_from_cellfinder_run, + ) + from brainglobe_workflows.cellfinder_core.cellfinder_core import ( + setup as setup_full, + ) # run setup cfg = setup_full(str(request.getfixturevalue(input_config))) @@ -340,4 +269,4 @@ def test_run_workflow_from_cellfinder_run( run_workflow_from_cellfinder_run(cfg) # check output files are those expected? - assert Path(cfg.detected_cells_path).is_file() + assert Path(cfg._detected_cells_path).is_file() diff --git a/tests/data/input_data_GIN.json b/tests/data/input_data_GIN.json deleted file mode 100644 index daf056a5..00000000 --- a/tests/data/input_data_GIN.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "install_path": ".cellfinder_workflows", - "data_url": "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip", - "data_hash": "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914", - "data_dir_relative": "cellfinder_test_data", - "signal_subdir": "signal", - "background_subdir": "background", - "output_path_basename_relative": "cellfinder_output_", - "detected_cells_filename": "detected_cells.xml", - "voxel_sizes": [ - 5, - 2, - 2 - ], - "start_plane": 0, - "end_plane": -1, - "trained_model": null, - "model_weights": null, - "model": "resnet50_tv", - "batch_size": 32, - "n_free_cpus": 2, - "network_voxel_sizes": [ - 5, - 1, - 1 - ], - "soma_diameter": 16, - "ball_xy_size": 6, - "ball_z_size": 15, - "ball_overlap_fraction": 0.6, - "log_sigma_size": 0.2, - "n_sds_above_mean_thresh": 10, - "soma_spread_factor": 1.4, - "max_cluster_size": 100000, - "cube_width": 50, - "cube_height": 50, - "cube_depth": 20, - "network_depth": "50" -} diff --git a/tests/data/input_data_locally.json b/tests/data/input_data_locally.json deleted file mode 100644 index e3761543..00000000 --- a/tests/data/input_data_locally.json +++ /dev/null @@ -1,37 +0,0 @@ -{ - "install_path": ".cellfinder_workflows", - "data_dir_relative": "cellfinder_test_data", - "signal_subdir": "signal", - "background_subdir": "background", - "output_path_basename_relative": "cellfinder_output_", - "detected_cells_filename": "detected_cells.xml", - "voxel_sizes": [ - 5, - 2, - 2 - ], - "start_plane": 0, - "end_plane": -1, - "trained_model": null, - "model_weights": null, - "model": "resnet50_tv", - "batch_size": 32, - "n_free_cpus": 2, - "network_voxel_sizes": [ - 5, - 1, - 1 - ], - "soma_diameter": 16, - "ball_xy_size": 6, - "ball_z_size": 15, - "ball_overlap_fraction": 0.6, - "log_sigma_size": 0.2, - "n_sds_above_mean_thresh": 10, - "soma_spread_factor": 1.4, - "max_cluster_size": 100000, - "cube_width": 50, - "cube_height": 50, - "cube_depth": 20, - "network_depth": "50" -} diff --git a/tests/data/input_data_missing_background.json b/tests/data/input_data_missing_background.json deleted file mode 100644 index 52454f9b..00000000 --- a/tests/data/input_data_missing_background.json +++ /dev/null @@ -1,37 +0,0 @@ -{ - "install_path": ".cellfinder_workflows", - "data_dir_relative": "cellfinder_test_data", - "signal_subdir": "signal", - "background_subdir": "__", - "output_path_basename_relative": "cellfinder_output_", - "detected_cells_filename": "detected_cells.xml", - "voxel_sizes": [ - 5, - 2, - 2 - ], - "start_plane": 0, - "end_plane": -1, - "trained_model": null, - "model_weights": null, - "model": "resnet50_tv", - "batch_size": 32, - "n_free_cpus": 2, - "network_voxel_sizes": [ - 5, - 1, - 1 - ], - "soma_diameter": 16, - "ball_xy_size": 6, - "ball_z_size": 15, - "ball_overlap_fraction": 0.6, - "log_sigma_size": 0.2, - "n_sds_above_mean_thresh": 10, - "soma_spread_factor": 1.4, - "max_cluster_size": 100000, - "cube_width": 50, - "cube_height": 50, - "cube_depth": 20, - "network_depth": "50" -} diff --git a/tests/data/input_data_missing_signal.json b/tests/data/input_data_missing_signal.json deleted file mode 100644 index 22c5247b..00000000 --- a/tests/data/input_data_missing_signal.json +++ /dev/null @@ -1,37 +0,0 @@ -{ - "install_path": ".cellfinder_workflows", - "data_dir_relative": "cellfinder_test_data", - "signal_subdir": "__", - "background_subdir": "background", - "output_path_basename_relative": "cellfinder_output_", - "detected_cells_filename": "detected_cells.xml", - "voxel_sizes": [ - 5, - 2, - 2 - ], - "start_plane": 0, - "end_plane": -1, - "trained_model": null, - "model_weights": null, - "model": "resnet50_tv", - "batch_size": 32, - "n_free_cpus": 2, - "network_voxel_sizes": [ - 5, - 1, - 1 - ], - "soma_diameter": 16, - "ball_xy_size": 6, - "ball_z_size": 15, - "ball_overlap_fraction": 0.6, - "log_sigma_size": 0.2, - "n_sds_above_mean_thresh": 10, - "soma_spread_factor": 1.4, - "max_cluster_size": 100000, - "cube_width": 50, - "cube_height": 50, - "cube_depth": 20, - "network_depth": "50" -} diff --git a/tests/data/input_data_not_locally_or_GIN.json b/tests/data/input_data_not_locally_or_GIN.json deleted file mode 100644 index e3761543..00000000 --- a/tests/data/input_data_not_locally_or_GIN.json +++ /dev/null @@ -1,37 +0,0 @@ -{ - "install_path": ".cellfinder_workflows", - "data_dir_relative": "cellfinder_test_data", - "signal_subdir": "signal", - "background_subdir": "background", - "output_path_basename_relative": "cellfinder_output_", - "detected_cells_filename": "detected_cells.xml", - "voxel_sizes": [ - 5, - 2, - 2 - ], - "start_plane": 0, - "end_plane": -1, - "trained_model": null, - "model_weights": null, - "model": "resnet50_tv", - "batch_size": 32, - "n_free_cpus": 2, - "network_voxel_sizes": [ - 5, - 1, - 1 - ], - "soma_diameter": 16, - "ball_xy_size": 6, - "ball_z_size": 15, - "ball_overlap_fraction": 0.6, - "log_sigma_size": 0.2, - "n_sds_above_mean_thresh": 10, - "soma_spread_factor": 1.4, - "max_cluster_size": 100000, - "cube_width": 50, - "cube_height": 50, - "cube_depth": 20, - "network_depth": "50" -}