Skip to content

Commit

Permalink
fix output_path with timestamp error. added function to generate conf…
Browse files Browse the repository at this point in the history
…igs on desired cache dirs.
  • Loading branch information
sfmig committed Oct 4, 2023
1 parent abd9e4c commit 5555fba
Show file tree
Hide file tree
Showing 2 changed files with 102 additions and 170 deletions.
108 changes: 57 additions & 51 deletions brainglobe_workflows/cellfinder/cellfinder_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,47 +29,47 @@
logger = logging.getLogger(__name__)

# Default config
DATA_URL = "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip"
DATA_HASH = "b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914"
CELLFINDER_CACHE_DIR = Path.home() / ".cellfinder_benchmarks"

DEFAULT_CONFIG_DICT = {
"install_path": CELLFINDER_CACHE_DIR,
"data_url": DATA_URL,
"data_hash": DATA_HASH,
"local_path": CELLFINDER_CACHE_DIR / "cellfinder_test_data",
"signal_parent_dir": str(
CELLFINDER_CACHE_DIR / "cellfinder_test_data" / "signal"
),
"background_parent_dir": str(
CELLFINDER_CACHE_DIR / "cellfinder_test_data" / "background"
),
"output_path": CELLFINDER_CACHE_DIR / "cellfinder_output",
"detected_cells_filepath": (
CELLFINDER_CACHE_DIR / "cellfinder_output" / "detected_cells.xml"
),
"voxel_sizes": [5, 2, 2], # microns
"start_plane": 0,
"end_plane": -1,
"trained_model": None, # if None, it will use a default model
"model_weights": None,
"model": "resnet50_tv",
"batch_size": 32,
"n_free_cpus": 2,
"network_voxel_sizes": [5, 1, 1],
"soma_diameter": 16,
"ball_xy_size": 6,
"ball_z_size": 15,
"ball_overlap_fraction": 0.6,
"log_sigma_size": 0.2,
"n_sds_above_mean_thresh": 10,
"soma_spread_factor": 1.4,
"max_cluster_size": 100000,
"cube_width": 50,
"cube_height": 50,
"cube_depth": 20,
"network_depth": "50",
}
CELLFINDER_CACHE_DIR = Path.home() / ".cellfinder_workflows"


def default_config_dict(CELLFINDER_CACHE_DIR):
return {
"install_path": CELLFINDER_CACHE_DIR,
"data_url": "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip",
"data_hash": (
"b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914"
),
"local_path": CELLFINDER_CACHE_DIR / "cellfinder_test_data",
"signal_parent_dir": str(
CELLFINDER_CACHE_DIR / "cellfinder_test_data" / "signal"
),
"background_parent_dir": str(
CELLFINDER_CACHE_DIR / "cellfinder_test_data" / "background"
),
"output_path_basename": CELLFINDER_CACHE_DIR / "cellfinder_output_",
"detected_cells_filename": "detected_cells.xml",
"voxel_sizes": [5, 2, 2], # microns
"start_plane": 0,
"end_plane": -1,
"trained_model": None, # if None, it will use a default model
"model_weights": None,
"model": "resnet50_tv",
"batch_size": 32,
"n_free_cpus": 2,
"network_voxel_sizes": [5, 1, 1],
"soma_diameter": 16,
"ball_xy_size": 6,
"ball_z_size": 15,
"ball_overlap_fraction": 0.6,
"log_sigma_size": 0.2,
"n_sds_above_mean_thresh": 10,
"soma_spread_factor": 1.4,
"max_cluster_size": 100000,
"cube_width": 50,
"cube_height": 50,
"cube_depth": 20,
"network_depth": "50",
}


@dataclass
Expand All @@ -90,8 +90,8 @@ class CellfinderConfig:
local_path: Pathlike
signal_parent_dir: str
background_parent_dir: str
output_path: Pathlike
detected_cells_filepath: Pathlike
output_path_basename: Pathlike
detected_cells_filename: Pathlike

# preprocessing parameters
voxel_sizes: Tuple[float, float, float]
Expand Down Expand Up @@ -120,6 +120,7 @@ class CellfinderConfig:

list_signal_files: Optional[list] = None
list_background_files: Optional[list] = None
output_path: Optional[Pathlike] = None


def example_cellfinder_script():
Expand Down Expand Up @@ -159,21 +160,23 @@ def run_workflow_from_cellfinder_run(cfg):
)

# Save results to xml file
save_cells(detected_cells, cfg.detected_cells_filepath)
save_cells(detected_cells, cfg.output_path / cfg.detected_cells_filename)


def setup_workflow():
def setup_workflow(cellfinder_cache_dir=CELLFINDER_CACHE_DIR):
"""Prepare configuration to run workflow
This includes
- instantiating the config dictionary,
- checking if the input data exists locally, and fetching from
GIN repository otherwise,
- creating the directory for the output of the workflow if it doesn't exist
- creating a timestamped directory for the output of the workflow if
it doesn't exist and adding it to the config
To instantiate the config dictionary, we first check if an environment
variable "CELLFINDER_CONFIG_PATH" pointing to a config json file exists.
If not, the default config (DEFAULT_CONFIG_DICT) is used.
Returns
-------
config : CellfinderConfig
Expand All @@ -182,7 +185,7 @@ def setup_workflow():
"""

# Define config
# if environment variable defined
# if environment variable defined, that prevails
if "CELLFINDER_CONFIG_PATH" in os.environ.keys():
input_config_path = Path(os.environ["CELLFINDER_CONFIG_PATH"])
assert input_config_path.exists()
Expand All @@ -198,9 +201,9 @@ def setup_workflow():
"Configuration retrieved from "
f'{os.environ["CELLFINDER_CONFIG_PATH"]}'
)
# else use the default config
# else use the default config, with the cellfinder cache directory provided
else:
config = CellfinderConfig(**DEFAULT_CONFIG_DICT)
config = CellfinderConfig(**default_config_dict(cellfinder_cache_dir))
logger.info("Using default configuration")

# Retrieve and add lists of input data to config if neither are defined
Expand All @@ -210,9 +213,12 @@ def setup_workflow():
# Create output directory if it doesn't exist, timestamped
timestamp = datetime.datetime.now()
timestamp_formatted = timestamp.strftime("%Y%m%d_%H%M%S")
(Path(str(config.output_path) + "_" + timestamp_formatted)).mkdir(
parents=True, exist_ok=True
output_path_timestamped = Path(
str(config.output_path) + timestamp_formatted
)
output_path_timestamped.mkdir(parents=True, exist_ok=True)
# add to config
config.output_path = output_path_timestamped

return config

Expand Down
164 changes: 45 additions & 119 deletions tests/test_integration/test_cellfinder_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import pytest

from brainglobe_workflows.cellfinder.cellfinder_main import (
default_config_dict,
run_workflow_from_cellfinder_run,
setup_workflow,
)
Expand All @@ -26,58 +27,7 @@ def cellfinder_cache_dir(tmp_path):


@pytest.fixture()
def config_from_dict(cellfinder_cache_dir):
DATA_URL = "https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/cellfinder-test-data.zip"
DATA_HASH = (
"b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914"
)
CELLFINDER_CACHE_DIR = cellfinder_cache_dir

workflow_config = {
"install_path": CELLFINDER_CACHE_DIR,
"data_url": DATA_URL,
"data_hash": DATA_HASH,
"local_path": CELLFINDER_CACHE_DIR / "cellfinder_test_data",
"signal_parent_dir": str(
CELLFINDER_CACHE_DIR / "cellfinder_test_data" / "signal"
),
"background_parent_dir": str(
CELLFINDER_CACHE_DIR / "cellfinder_test_data" / "background"
),
"output_path": CELLFINDER_CACHE_DIR / "cellfinder_output",
"detected_cells_filepath": (
CELLFINDER_CACHE_DIR / "cellfinder_output" / "detected_cells.xml"
),
"voxel_sizes": [5, 2, 2], # microns
"start_plane": 0,
"end_plane": -1,
"trained_model": None, # if None, it will use a default model
"model_weights": None,
"model": "resnet50_tv",
"batch_size": 32,
"n_free_cpus": 2,
"network_voxel_sizes": [5, 1, 1],
"soma_diameter": 16,
"ball_xy_size": 6,
"ball_z_size": 15,
"ball_overlap_fraction": 0.6,
"log_sigma_size": 0.2,
"n_sds_above_mean_thresh": 10,
"soma_spread_factor": 1.4,
"max_cluster_size": 100000,
"cube_width": 50,
"cube_height": 50,
"cube_depth": 20,
"network_depth": "50",
}
return workflow_config


@pytest.fixture()
def config_from_env_var(tmp_path, config_from_dict):
# create a temp json file to dump config data
input_config_path = tmp_path / "input_config.json"

def config_from_env_var(tmp_path, cellfinder_cache_dir):
# dump config from fixture into a json file
# ensure all Paths are JSON serializable
def prep_json(obj):
Expand All @@ -86,8 +36,17 @@ def prep_json(obj):
else:
return json.JSONEncoder.default(obj)

# create a temp json file to dump config data
input_config_path = tmp_path / "input_config.json"

# alter config if required by the test
# - missing signal directory
# - missing background directory
config_dict = default_config_dict(cellfinder_cache_dir)

# dump config into json
with open(input_config_path, "w") as js:
json.dump(config_from_dict, js, default=prep_json)
json.dump(config_dict, js, default=prep_json)

# define environment variable pointing to this json file
os.environ["CELLFINDER_CONFIG_PATH"] = str(input_config_path)
Expand All @@ -97,23 +56,7 @@ def prep_json(obj):
del os.environ["CELLFINDER_CONFIG_PATH"]


def test_run_with_predefined_default_config(
config_from_dict, caplog, logger_str
):
# run setup and workflow
with caplog.at_level(
logging.DEBUG, logger=logger_str
): # temporarily sets the log level for the given logger
cfg = setup_workflow(config_from_dict)
run_workflow_from_cellfinder_run(cfg)

# check log
assert "Using default configuration" in caplog.messages


def test_run_with_env_var_defined_config(
config_from_env_var, caplog, logger_str
):
def test_run_with_env_var_config(config_from_env_var, caplog, logger_str):
# check environment variable exists
assert "CELLFINDER_CONFIG_PATH" in os.environ.keys()

Expand All @@ -127,64 +70,47 @@ def test_run_with_env_var_defined_config(
"Configuration retrieved from "
f'{os.environ["CELLFINDER_CONFIG_PATH"]}' in caplog.messages
)


def test_setup_with_missing_signal_data(config_from_dict, caplog, logger_str):
# check neither signal or background data exist locally,
assert not Path(config_from_dict["signal_parent_dir"]).exists()
assert not Path(config_from_dict["background_parent_dir"]).exists()

# create a directory for the background only
Path(config_from_dict["background_parent_dir"]).mkdir(
parents=True, exist_ok=True
)

# run setup
# context manager temporarily sets the log level for the given logger
with caplog.at_level(logging.DEBUG, logger=logger_str):
cfg = setup_workflow(config_from_dict)

# check log-- when run as a suite, both directories exist already?
assert (
f"The directory {cfg.signal_parent_dir} "
"does not exist" in caplog.messages
)


def test_setup_with_missing_background_data(
config_from_dict, caplog, logger_str
):
# check neither signal or background data exist locally,
assert not Path(config_from_dict["signal_parent_dir"]).exists()
assert not Path(config_from_dict["background_parent_dir"]).exists()

# create a directory for the signal, but not for the background
Path(config_from_dict["signal_parent_dir"]).mkdir(
parents=True, exist_ok=True
)

# run setup
with caplog.at_level(logging.DEBUG, logger=logger_str):
cfg = setup_workflow(config_from_dict)

# check log
assert (
f"The directory {cfg.background_parent_dir} "
"does not exist" in caplog.messages
"Fetching input data from the "
"provided GIN repository" in caplog.messages
)


def test_setup_fetching_from_GIN(config_from_dict, caplog, logger_str):
# check neither signal or background data exist locally before setup
assert not Path(config_from_dict["signal_parent_dir"]).exists()
assert not Path(config_from_dict["background_parent_dir"]).exists()
def test_run_with_default_config(cellfinder_cache_dir, caplog, logger_str):
# check environment var is not defined
assert "CELLFINDER_CONFIG_PATH" not in os.environ.keys()

# run setup
# run setup and workflow
with caplog.at_level(logging.DEBUG, logger=logger_str):
setup_workflow(config_from_dict)
cfg = setup_workflow(cellfinder_cache_dir)
run_workflow_from_cellfinder_run(cfg)

# check log
assert "Using default configuration" in caplog.messages
assert (
"Fetching input data from the "
"provided GIN repository" in caplog.messages
)


# test running on local data?
# def test_run_with_default_config_local(
# caplog, logger_str
# ):
# # check environment var is not defined
# assert "CELLFINDER_CONFIG_PATH" not in os.environ.keys()

# # run setup and workflow
# # do not pass a cellfinder_cache_dir location to use default
# with caplog.at_level(
# logging.DEBUG, logger=logger_str
# ):
# cfg = setup_workflow()
# run_workflow_from_cellfinder_run(cfg)

# # check log
# assert "Using default configuration" in caplog.messages
# assert (
# "Fetching input data from the "
# "local directories" in caplog.messages
# )

0 comments on commit 5555fba

Please sign in to comment.