From 25e1ec6dd6fb2c3c45dd7c2c832037e8168bd021 Mon Sep 17 00:00:00 2001 From: Mike Gevaert Date: Wed, 4 Oct 2023 16:05:47 +0200 Subject: [PATCH] Update densities.utils function locality * put functions that are in densities.utils in the module that uses them, if that is the only module using the function * use __future__ annotations on files touched * ignore useless warnings from nptyping --- atlas_densities/app/cell_densities.py | 42 ++++- atlas_densities/densities/excel_reader.py | 76 ++++++--- atlas_densities/densities/fitting.py | 55 ++++-- ...nhibitory_neuron_densities_optimization.py | 66 +++++++- atlas_densities/densities/utils.py | 156 +----------------- tests/app/test_cell_densities.py | 26 +++ ..._inhibitory_neuron_density_optimization.py | 44 +++++ tests/densities/test_utils.py | 58 +------ tox.ini | 2 +- 9 files changed, 269 insertions(+), 256 deletions(-) diff --git a/atlas_densities/app/cell_densities.py b/atlas_densities/app/cell_densities.py index b54b44d..2bec3d4 100644 --- a/atlas_densities/app/cell_densities.py +++ b/atlas_densities/app/cell_densities.py @@ -55,6 +55,7 @@ log_args, set_verbose, ) +from atlas_commons.typing import FloatArray from voxcell import RegionMap, VoxelData # type: ignore from atlas_densities.app.utils import AD_PATH, DATA_PATH @@ -82,7 +83,6 @@ from atlas_densities.densities.refined_inhibitory_neuron_densities import ( create_inhibitory_neuron_densities as keep_proportions, ) -from atlas_densities.densities.utils import zero_negative_values from atlas_densities.exceptions import AtlasDensitiesError EXCITATORY_SPLIT_CORTEX_ALL_TO_EXC_MTYPES = ( @@ -102,6 +102,42 @@ L = logging.getLogger(__name__) +def _zero_negative_values(array: FloatArray) -> None: + """ + Zero negative values resulting from round-off errors. + + Modifies `array` in place. + + Args: + array: float numpy array. We expect most of the `array` values to be non-negative. + Negative values should be negligible when compared to positive values. + + Raises: + AtlasDensitiesError if the absolute value of the sum of all negative values exceeds + 1 percent of the sum of all positive values, or if the smallest negative value is + not negligible wrt to the mean of the non-negative values. + """ + negative_mask = array < 0.0 + if np.count_nonzero(negative_mask) == 0: + return + + non_negative_mask = np.invert(negative_mask) + + if np.abs(np.sum(array[negative_mask])) / np.sum(array[non_negative_mask]) > 0.01: + raise AtlasDensitiesError( + "The absolute value of the sum of all negative values exceeds" + " 1 percent of the sum of all positive values" + ) + ratio = np.abs(np.min(array[negative_mask])) / np.mean(array[non_negative_mask]) + if not np.isclose(ratio, 0.0, atol=1e-08): + raise AtlasDensitiesError( + "The smallest negative value is not negligible wrt to " + "the mean of all non-negative values." + ) + + array[negative_mask] = 0.0 + + def _get_voxel_volume_in_mm3(voxel_data: "VoxelData") -> float: """ Returns the voxel volume of `voxel_data` in mm^3. @@ -333,7 +369,7 @@ def glia_cell_densities( L.info("Saving overall neuron density to file %s", str(Path(output_dir, "neuron_density.nrrd"))) neuron_density = overall_cell_density.raw - glia_densities["glia"] - zero_negative_values(neuron_density) + _zero_negative_values(neuron_density) annotation.with_data(np.asarray(neuron_density, dtype=float)).save_nrrd( str(Path(output_dir, "neuron_density.nrrd")) ) @@ -466,7 +502,7 @@ def inhibitory_and_excitatory_neuron_densities( str(Path(output_dir, "inhibitory_neuron_density.nrrd")) ) excitatory_neuron_density = neuron_density.raw - inhibitory_neuron_density - zero_negative_values(excitatory_neuron_density) + _zero_negative_values(excitatory_neuron_density) annotation.with_data(np.asarray(excitatory_neuron_density, dtype=float)).save_nrrd( str(Path(output_dir, "excitatory_neuron_density.nrrd")) ) diff --git a/atlas_densities/densities/excel_reader.py b/atlas_densities/densities/excel_reader.py index 72abbf0..9a336b1 100644 --- a/atlas_densities/densities/excel_reader.py +++ b/atlas_densities/densities/excel_reader.py @@ -21,8 +21,8 @@ somatostatin, 'inhibitory neuron' for non-specfic inhibitory neurons) * measurement (float) * standard_deviation (non-negative float) -* measurement_type (str), see MEASUREMENT_TYPES from :mod:`densities.utils` -* measurement_unit (str), see MEASUREMENT_UNITS from :mod:`densities.utils` +* measurement_type (str), see MEASUREMENT_TYPES +* measurement_unit (str), see MEASUREMENT_UNITS * comment (str), a comment on how the measurement has been obtained * source_title (str), the title of the article where the measurement can be extracted * specimen_age (str, e.g., '8 week old', 'P56', '3 month old'), age(s) of the mice used to obtain @@ -36,34 +36,64 @@ Lexicon: AIBS stands for Allen Institute for Brain Science https://alleninstitute.org/what-we-do/brain-science/ """ +from __future__ import annotations import logging from collections import defaultdict -from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Union +from typing import TYPE_CHECKING, Optional from warnings import warn import numpy as np import numpy.testing as npt import pandas as pd -from atlas_densities.densities.utils import ( - MEASUREMENT_TYPES, - MEASUREMENT_UNITS, - get_aibs_region_names, -) from atlas_densities.exceptions import AtlasDensitiesWarning if TYPE_CHECKING: # pragma: no cover from pathlib import Path - from pandas import DataFrame from voxcell import RegionMap L = logging.getLogger(__name__) +MEASUREMENT_TYPES = { + # For a given brain region R and a given cell type T: + 0: "cell density", # number of cells of type T per mm^3 in R + 1: "neuron proportion", # number of cells of type T / number of neurons in R + 2: "cell proportion", # number of cells of type T / number of cells in R + 3: "cell count per slice", # number of cells of type T per slice of R, see MEASUREMENT_UNITS +} + +MEASUREMENT_UNITS = { + "cell density": "number of cells per mm^3", + "neuron proportion": "None", + "cell proportion": "None", + "cell count per slice": "number of cells per 5 micrometer-thick slice", +} + + +def _get_aibs_region_names(region_map: "RegionMap") -> set[str]: + """ + Retrieve the names of every region in `region_map`. + + Args: + region_map: RegionMap object to navigate the brain regions hierarchy + instantiated with the 1.json hierarchy file from AIBS. + + Returns: + set of strings containing the names of all regions represented in + `region_map`. + + """ + aibs_region_ids = region_map.find( + "Basic cell groups and regions", attr="name", with_descendants=True + ) + + return {region_map.get(id_, "name") for id_ in aibs_region_ids} + def compute_kim_et_al_neuron_densities( - inhibitory_neuron_densities_path: Union[str, "Path"] + inhibitory_neuron_densities_path: str | "Path", ) -> pd.DataFrame: """ Extract from excel file and average over gender the densities of the cells reacting to @@ -225,7 +255,7 @@ def _fill_in_the_gaps( The same applies to the 'comment' column. Source title blocks and comment blocks do not always coincide. """ - columns: Dict[str, List[str]] = defaultdict(list) + columns: dict[str, list[str]] = defaultdict(list) for start, end in zip(collapsed_blocks.index, collapsed_blocks.index[1:]): columns[header].extend([dataframe[header][start]] * (end - start)) # Handle the tail of the column @@ -242,13 +272,13 @@ def _fill_in_the_gaps( def _set_measurement_type_and_unit_columns( - dataframe: pd.DataFrame, code_filter: Optional[List[int]] = None + dataframe: pd.DataFrame, code_filter: Optional[list[int]] = None ) -> None: """ - Set the values of the 'measurement_type' and 'measurement_unit' columns. + set the values of the 'measurement_type' and 'measurement_unit' columns. The assigment is based on a integer code in the range [0, 3], see MEASUREMENT_TYPES and - MEASUREMENT_UNITS from :mod:`densities.utils`. + MEASUREMENT_UNITS Args: dataframe: DataFrame obtained when reading the worksheets 'GAD67 densities' or 'PV-SST-VIP' @@ -346,8 +376,8 @@ def _enforce_column_types(dataframe: "pd.Dataframe", has_source: bool = True) -> def read_inhibitory_neuron_measurement_compilation( - measurements_path: Union[str, "Path"] -) -> Tuple[pd.DataFrame, Set[str]]: + measurements_path: str | "Path", +) -> tuple[pd.DataFrame, set[str]]: """ Read the neuron densities of the worksheet 'GAD67 densities' in gaba_papers.xlsx @@ -428,7 +458,7 @@ def _stack_pv_sst_vip_measurements(dataframe: pd.DataFrame) -> pd.DataFrame: return result -def read_pv_sst_vip_measurement_compilation(measurements_path: Union[str, "Path"]) -> pd.DataFrame: +def read_pv_sst_vip_measurement_compilation(measurements_path: str | "Path") -> pd.DataFrame: """ Read the neuron densities of the worksheet 'PV-SST-VIP' in gaba_papers.xlsx @@ -471,7 +501,7 @@ def read_pv_sst_vip_measurement_compilation(measurements_path: Union[str, "Path" return pv_sst_vip_neurons_worksheet -def read_homogenous_neuron_type_regions(measurements_path: Union[str, "Path"]) -> pd.DataFrame: +def read_homogenous_neuron_type_regions(measurements_path: str | "Path") -> pd.DataFrame: """ Read the region list of the worksheet 'Full inhibexc regions' in gaba_papers.xlsx @@ -535,7 +565,7 @@ def _replace_accents(region_name: str) -> str: # or 'layer 6b' as in AIBS 1.json. # When such a problematic name is encountered, we remove it and insert its 'layer 6a' and/or # 'layer 6b' counterpart , if they exist. - aibs_region_names = get_aibs_region_names(region_map) + aibs_region_names = _get_aibs_region_names(region_map) invalid_region_names = set(dataframe["full_name"]) - aibs_region_names layer_6_names = set( name for name in aibs_region_names if ("layer 6" in name) or ("Layer 6" in name) @@ -567,7 +597,7 @@ def _replace_accents(region_name: str) -> str: def read_kim_et_al_neuron_densities( - region_map: "RegionMap", inhibitory_neuron_densities_path: Union[str, "Path"] + region_map: "RegionMap", inhibitory_neuron_densities_path: str | "Path" ) -> pd.DataFrame: """ Read the neuron densities of the worksheet 'Sheet 1' in mmc3.xlsx @@ -609,9 +639,9 @@ def read_kim_et_al_neuron_densities( def read_measurements( region_map: "RegionMap", - mmc3_path: Union[str, "Path"], - gaba_papers_path: Union[str, "Path"], - non_density_measurements_path: Union[str, "Path"], + mmc3_path: str | "Path", + gaba_papers_path: str | "Path", + non_density_measurements_path: str | "Path", ) -> pd.DataFrame: """ Read all cell density related measurements from file and returns a unique DataFrame diff --git a/atlas_densities/densities/fitting.py b/atlas_densities/densities/fitting.py index b327a54..5fd5289 100644 --- a/atlas_densities/densities/fitting.py +++ b/atlas_densities/densities/fitting.py @@ -16,10 +16,11 @@ To estimate the average density of a cell type T of a brain region R, we select the linear mapping obtained for T and the group R it belongs to. """ +from __future__ import annotations import logging import warnings -from typing import TYPE_CHECKING, Dict, List, Optional, Set, Union +from typing import TYPE_CHECKING, Dict, List, Optional, Union import numpy as np import pandas as pd @@ -27,7 +28,7 @@ from scipy.optimize import curve_fit from tqdm import tqdm -from atlas_densities.densities.utils import get_group_names, get_hierarchy_info +from atlas_densities.densities import utils from atlas_densities.exceptions import AtlasDensitiesError, AtlasDensitiesWarning if TYPE_CHECKING: # pragma: no cover @@ -39,7 +40,7 @@ def create_dataframe_from_known_densities( - region_names: List[str], + region_names: list[str], average_densities: pd.DataFrame, ) -> pd.DataFrame: """ @@ -107,7 +108,7 @@ def fill_in_homogenous_regions( neuron_density: FloatArray, densities: pd.DataFrame, hierarchy_info: pd.DataFrame, - cell_density_stddevs: Optional[Dict[str, float]] = None, + cell_density_stddevs: Optional[dict[str, float]] = None, ) -> None: """ Fill the average density values of every region where all neurons are either inhibitory or @@ -171,7 +172,7 @@ def fill_in_homogenous_regions( def compute_average_intensity( - intensity: FloatArray, volume_mask: BoolArray, slices: Optional[List[int]] = None + intensity: FloatArray, volume_mask: BoolArray, slices: Optional[list[int]] = None ) -> float: """ Compute the average of `intensity` within the volume defined by `volume_mask`. @@ -230,7 +231,7 @@ def compute_average_intensities( } where each intensity array is of shape (W, H, D) and where the items of each slice list range in [0, W - 1]. - hierarchy_info: data frame with colums "descendant_id_set" (Set[int]) and "brain_region" + hierarchy_info: data frame with colums "descendant_id_set" (set[int]) and "brain_region" (str) and whose index is a list of region ids. See :fun:`atlas_densities.densities.utils.get_hierarchy_info`. @@ -268,8 +269,8 @@ def compute_average_intensities( def linear_fitting_xy( - xdata: List[float], ydata: List[float], sigma: Union[List[float], FloatArray] -) -> Dict[str, float]: + xdata: list[float], ydata: list[float], sigma: Union[list[float], FloatArray] +) -> dict[str, float]: """ Compute the coefficient of the linear least-squares regression of the point cloud (`xdata`, `ydata`) and its standard deviation. @@ -324,7 +325,7 @@ def linear_fitting_xy( def compute_fitting_coefficients( - groups: Dict[str, Set[str]], average_intensities: pd.DataFrame, densities: pd.DataFrame + groups: dict[str, set[str]], average_intensities: pd.DataFrame, densities: pd.DataFrame ) -> FittingData: """ Compute the linear fitting coefficient of the cloud of 2D points (average marker intensity, @@ -391,7 +392,7 @@ def compute_fitting_coefficients( for group_name in groups } - clouds: Dict[str, Dict[str, Dict[str, List[float]]]] = { + clouds: dict[str, dict[str, dict[str, list[float]]]] = { group_name: {cell_type: {"xdata": [], "ydata": [], "sigma": []} for cell_type in cell_types} for group_name in groups } @@ -440,7 +441,7 @@ def compute_fitting_coefficients( def fit_unknown_densities( - groups: Dict[str, Set[str]], + groups: dict[str, set[str]], average_intensities: pd.DataFrame, densities: pd.DataFrame, fitting_coefficients: FittingData, @@ -526,6 +527,32 @@ def _check_average_densities_sanity(average_densities: pd.DataFrame) -> None: ) +def _get_group_names( + region_map: "RegionMap", cleanup_rest: bool = False, root_region_name: str | None = None +) -> dict[str, set[str]]: + """ + Get AIBS names for regions in several region groups of interest. + + Args: + region_map: object to navigate the mouse brain regions hierarchy + (instantiated from AIBS 1.json). + cleanup_rest: (Optional) If True, the name of any ascendant region of the Cerebellum and + Isocortex groups are removed from the names of the Rest group. This makes sure that + the set of names of the Rest group is closed under taking descendants. + + Returns: + A dictionary whose keys are region group names and whose values are + sets of brain region names. + """ + + group_ids = utils.get_group_ids(region_map, cleanup_rest, root_region_name) + + return { + group_name: {region_map.get(id_, attr="name") for id_ in group_ids[group_name]} + for group_name in ["Cerebellum group", "Isocortex group", "Rest"] + } + + def linear_fitting( # pylint: disable=too-many-arguments region_map: "RegionMap", annotation: AnnotationT, @@ -533,7 +560,7 @@ def linear_fitting( # pylint: disable=too-many-arguments gene_marker_volumes: MarkerVolumes, average_densities: pd.DataFrame, homogenous_regions: pd.DataFrame, - cell_density_stddevs: Optional[Dict[str, float]] = None, + cell_density_stddevs: Optional[dict[str, float]] = None, region_name: str = "root", ) -> pd.DataFrame: """ @@ -587,7 +614,7 @@ def linear_fitting( # pylint: disable=too-many-arguments _check_average_densities_sanity(average_densities) _check_homogenous_regions_sanity(homogenous_regions) - hierarchy_info = get_hierarchy_info(region_map, root=region_name) + hierarchy_info = utils.get_hierarchy_info(region_map, root=region_name) L.info("Creating a data frame from known densities ...") densities = create_dataframe_from_known_densities( hierarchy_info["brain_region"].to_list(), average_densities @@ -620,7 +647,7 @@ def linear_fitting( # pylint: disable=too-many-arguments L.info("Getting group names ...") # We want group region names to be stable under taking descendants - groups = get_group_names(region_map, cleanup_rest=True, root_region_name=region_name) + groups = _get_group_names(region_map, cleanup_rest=True, root_region_name=region_name) L.info("Computing fitting coefficients ...") fitting_coefficients = compute_fitting_coefficients(groups, average_intensities, densities) diff --git a/atlas_densities/densities/inhibitory_neuron_densities_optimization.py b/atlas_densities/densities/inhibitory_neuron_densities_optimization.py index 450a3f9..64d5987 100644 --- a/atlas_densities/densities/inhibitory_neuron_densities_optimization.py +++ b/atlas_densities/densities/inhibitory_neuron_densities_optimization.py @@ -48,6 +48,7 @@ from tqdm import tqdm from voxcell import RegionMap +from atlas_densities.densities import utils from atlas_densities.densities.inhibitory_neuron_densities_helper import ( average_densities_to_cell_counts, check_region_counts_consistency, @@ -55,11 +56,6 @@ replace_inf_with_none, resize_average_densities, ) -from atlas_densities.densities.utils import ( - compute_region_cell_counts, - compute_region_volumes, - get_hierarchy_info, -) from atlas_densities.exceptions import AtlasDensitiesError, AtlasDensitiesWarning L = logging.getLogger(__name__) @@ -68,6 +64,60 @@ KEEP = np.nan # Used to signify that a delta variable should be added to the linear program +def _compute_region_cell_counts( + annotation: AnnotationT, + density: FloatArray, + voxel_volume: float, + hierarchy_info: "pd.DataFrame", + with_descendants: bool = True, +) -> "pd.DataFrame": + """ + Compute the cell count of every 3D region in `annotation` labeled by a unique + id in `ids` wrt cell `density`. + + Args: + annotation: int array of shape (W, H, D) holding the annotation of the whole AIBS + mouse brain. (The integers W, H and D are the dimensions of the array). + density: float array of shape `annotation.shape` holding the volumetric density of a + given cell type. The value assigned to a voxel represents the cell density in this + voxel expressed in number of cells per mm^3. + voxel_volume: volume in mm^3 of a voxel in any of the volumetric input arrays. + This is (25 * 1e-6) ** 3 for an AIBS atlas nrrd file with 25um resolution. + hierarchy_info: data frame returned by + :func:`atlas_densities.densities.utils.get_hierarchy_info`. + with_descendants: if True, a computed cell count refers to the entire 3D region + designated by a region name, including all descendants subregions. Otherwise, the + computed cell count is the cell count of the 3D region labeled by the unique region + identifier. Defaults to True. + + Returns: + DataFrame of the following form (values are fake): + brain_region cell_count + 10 Basic cell groups and regions 200.30 + 123 Cerebrum 75.05 + ... ... + The index is the sorted list of all region identifiers. + """ + id_counts = [] + for id_ in tqdm(hierarchy_info.index): + mask = annotation == id_ + id_counts.append(np.sum(density[mask]) * voxel_volume) + + result = pd.DataFrame( + {"brain_region": hierarchy_info["brain_region"], "cell_count": id_counts}, + index=hierarchy_info.index, + ) + + if with_descendants: + counts = [] + for id_set in tqdm(hierarchy_info["descendant_id_set"]): + count = result.loc[list(id_set), "cell_count"].sum() + counts.append(count) + result["cell_count"] = counts + + return result + + def set_known_values( region_counts: pd.DataFrame, id_counts: pd.DataFrame, @@ -435,7 +485,7 @@ def _compute_initial_cell_counts( associated standard deviations (descendant subregions are excluded). """ L.info("Computing the volume of every 3D region ...") - volumes = compute_region_volumes(annotation, voxel_volume, hierarchy_info) + volumes = utils.compute_region_volumes(annotation, voxel_volume, hierarchy_info) L.info("Computing cell count estimates in every 3D region ...") region_counts = average_densities_to_cell_counts(average_densities, volumes) @@ -611,7 +661,7 @@ def create_inhibitory_neuron_densities( # pylint: disable=too-many-locals linear program cannot be solved. """ - hierarchy_info = get_hierarchy_info(RegionMap.from_dict(hierarchy), root=region_name) + hierarchy_info = utils.get_hierarchy_info(RegionMap.from_dict(hierarchy), root=region_name) average_densities = resize_average_densities(average_densities, hierarchy_info) L.info("Initialization of the linear program: started") @@ -620,7 +670,7 @@ def create_inhibitory_neuron_densities( # pylint: disable=too-many-locals ) L.info("Retrieving overall neuron counts in atomic 3D regions ...") - neuron_counts = compute_region_cell_counts( + neuron_counts = _compute_region_cell_counts( annotation, neuron_density, voxel_volume, hierarchy_info, with_descendants=False ) assert np.all(neuron_counts["cell_count"] >= 0.0) diff --git a/atlas_densities/densities/utils.py b/atlas_densities/densities/utils.py index bf91776..f0421a9 100644 --- a/atlas_densities/densities/utils.py +++ b/atlas_densities/densities/utils.py @@ -18,22 +18,6 @@ from voxcell import RegionMap # type: ignore -MEASUREMENT_TYPES = { - # For a given brain region R and a given cell type T: - 0: "cell density", # number of cells of type T per mm^3 in R - 1: "neuron proportion", # number of cells of type T / number of neurons in R - 2: "cell proportion", # number of cells of type T / number of cells in R - 3: "cell count per slice", # number of cells of type T per slice of R, see MEASUREMENT_UNITS -} - -MEASUREMENT_UNITS = { - "cell density": "number of cells per mm^3", - "neuron proportion": "None", - "cell proportion": "None", - "cell count per slice": "number of cells per 5 micrometer-thick slice", -} - - def normalize_intensity( marker_intensity: FloatArray, annotation: AnnotationT, @@ -148,7 +132,7 @@ def compensate_cell_overlap( # pylint: disable=fixme # TODO: Re-assess and underline each density validation criterion. Design an actual optimization # strategy if appropriate. -def optimize_distance_to_line( # pylint: disable=too-many-arguments +def _optimize_distance_to_line( # pylint: disable=too-many-arguments line_direction_vector: FloatArray, upper_bounds: FloatArray, sum_constraint: float, @@ -304,7 +288,7 @@ def constrain_cell_counts_per_voxel( # pylint: disable=too-many-arguments, too- # Find a cell count field respecting all the constraints and which is as close # as possible to the line defined by the input cell counts wrt to Euclidean norm. - cell_counts[complement] = optimize_distance_to_line( + cell_counts[complement] = _optimize_distance_to_line( line_direction_vector, upper_bound, target_sum - max_subsum, @@ -411,32 +395,6 @@ def get_group_ids( return ret -def get_group_names( - region_map: "RegionMap", cleanup_rest: bool = False, root_region_name: str | None = None -) -> dict[str, set[str]]: - """ - Get AIBS names for regions in several region groups of interest. - - Args: - region_map: object to navigate the mouse brain regions hierarchy - (instantiated from AIBS 1.json). - cleanup_rest: (Optional) If True, the name of any ascendant region of the Cerebellum and - Isocortex groups are removed from the names of the Rest group. This makes sure that - the set of names of the Rest group is closed under taking descendants. - - Returns: - A dictionary whose keys are region group names and whose values are - sets of brain region names. - """ - - group_ids = get_group_ids(region_map, cleanup_rest, root_region_name) - - return { - group_name: {region_map.get(id_, attr="name") for id_ in group_ids[group_name]} - for group_name in ["Cerebellum group", "Isocortex group", "Rest"] - } - - def get_region_masks( group_ids: dict[str, set[int]], annotation: FloatArray ) -> dict[str, BoolArray]: @@ -464,26 +422,6 @@ def get_region_masks( } -def get_aibs_region_names(region_map: "RegionMap") -> set[str]: - """ - Retrieve the names of every region in `region_map`. - - Args: - region_map: RegionMap object to navigate the brain regions hierarchy - instantiated with the 1.json hierarchy file from AIBS. - - Returns: - Set of strings containing the names of all regions represented in - `region_map`. - - """ - aibs_region_ids = region_map.find( - "Basic cell groups and regions", attr="name", with_descendants=True - ) - - return {region_map.get(id_, "name") for id_ in aibs_region_ids} - - def get_hierarchy_info( region_map: "RegionMap", root: str = "Basic cell groups and regions", @@ -575,93 +513,3 @@ def compute_region_volumes( result["volume"] = volumes return result - - -def compute_region_cell_counts( - annotation: AnnotationT, - density: FloatArray, - voxel_volume: float, - hierarchy_info: "pd.DataFrame", - with_descendants: bool = True, -) -> "pd.DataFrame": - """ - Compute the cell count of every 3D region in `annotation` labeled by a unique - id in `ids` wrt cell `density`. - - Args: - annotation: int array of shape (W, H, D) holding the annotation of the whole AIBS - mouse brain. (The integers W, H and D are the dimensions of the array). - density: float array of shape `annotation.shape` holding the volumetric density of a - given cell type. The value assigned to a voxel represents the cell density in this - voxel expressed in number of cells per mm^3. - voxel_volume: volume in mm^3 of a voxel in any of the volumetric input arrays. - This is (25 * 1e-6) ** 3 for an AIBS atlas nrrd file with 25um resolution. - hierarchy_info: data frame returned by - :func:`atlas_densities.densities.utils.get_hierarchy_info`. - with_descendants: if True, a computed cell count refers to the entire 3D region - designated by a region name, including all descendants subregions. Otherwise, the - computed cell count is the cell count of the 3D region labeled by the unique region - identifier. Defaults to True. - - Returns: - DataFrame of the following form (values are fake): - brain_region cell_count - 10 Basic cell groups and regions 200.30 - 123 Cerebrum 75.05 - ... ... - The index is the sorted list of all region identifiers. - """ - id_counts = [] - for id_ in tqdm(hierarchy_info.index): - mask = annotation == id_ - id_counts.append(np.sum(density[mask]) * voxel_volume) - - result = pd.DataFrame( - {"brain_region": hierarchy_info["brain_region"], "cell_count": id_counts}, - index=hierarchy_info.index, - ) - - if with_descendants: - counts = [] - for id_set in tqdm(hierarchy_info["descendant_id_set"]): - count = result.loc[list(id_set), "cell_count"].sum() - counts.append(count) - result["cell_count"] = counts - - return result - - -def zero_negative_values(array: FloatArray) -> None: - """ - Zero negative values resulting from round-off errors. - - Modifies `array` in place. - - Args: - array: float numpy array. We expect most of the `array` values to be non-negative. - Negative values should be negligible when compared to positive values. - - Raises: - AtlasDensitiesError if the absolute value of the sum of all negative values exceeds - 1 percent of the sum of all positive values, or if the smallest negative value is - not negligible wrt to the mean of the non-negative values. - """ - negative_mask = array < 0.0 - if np.count_nonzero(negative_mask) == 0: - return - - non_negative_mask = np.invert(negative_mask) - - if np.abs(np.sum(array[negative_mask])) / np.sum(array[non_negative_mask]) > 0.01: - raise AtlasDensitiesError( - "The absolute value of the sum of all negative values exceeds" - " 1 percent of the sum of all positive values" - ) - ratio = np.abs(np.min(array[negative_mask])) / np.mean(array[non_negative_mask]) - if not np.isclose(ratio, 0.0, atol=1e-08): - raise AtlasDensitiesError( - "The smallest negative value is not negligible wrt to " - "the mean of all non-negative values." - ) - - array[negative_mask] = 0.0 diff --git a/tests/app/test_cell_densities.py b/tests/app/test_cell_densities.py index a922812..5fc793f 100644 --- a/tests/app/test_cell_densities.py +++ b/tests/app/test_cell_densities.py @@ -6,6 +6,7 @@ import numpy.testing as npt import pandas as pd import pandas.testing as pdt +import pytest import yaml # type: ignore from click.testing import CliRunner from voxcell import VoxelData # type: ignore @@ -17,6 +18,7 @@ inhibitory_data, ) from atlas_densities.densities.measurement_to_density import remove_non_density_measurements +from atlas_densities.exceptions import AtlasDensitiesError from tests.densities.test_excel_reader import ( check_columns_na, check_non_negative_values, @@ -400,3 +402,27 @@ def test_fit_average_densities(): ) result = _get_fitting_result(runner) assert "Negative density value" in str(result.exception) + + +def test_zero_negative_values(): + array = np.array([0, 1, -0.02], dtype=float) + with pytest.raises( + AtlasDensitiesError, + match="absolute value of the sum of all negative values exceeds 1 percent of the sum of all positive values", + ): + tested._zero_negative_values(array) + + array = np.array([0, 1, -0.01], dtype=float) + with pytest.raises( + AtlasDensitiesError, + match="smallest negative value is not negligible wrt to the mean of all non-negative values", + ): + tested._zero_negative_values(array) + + array = np.array([0, 1, -1e-8 / 2.0], dtype=float) + tested._zero_negative_values(array) + npt.assert_array_almost_equal(array, np.array([0, 1, 0], dtype=float)) + + array = np.array([0, 1, 1], dtype=float) + tested._zero_negative_values(array) + npt.assert_array_almost_equal(array, np.array([0, 1, 1], dtype=float)) diff --git a/tests/densities/test_inhibitory_neuron_density_optimization.py b/tests/densities/test_inhibitory_neuron_density_optimization.py index 11985fc..9347f6d 100644 --- a/tests/densities/test_inhibitory_neuron_density_optimization.py +++ b/tests/densities/test_inhibitory_neuron_density_optimization.py @@ -329,3 +329,47 @@ def test_create_inhibitory_neuron_densities(): assert np.all(densities["gad67+"] <= data["neuron_density"]) assert np.all(sum_ <= data["neuron_density"]) assert np.all(sum_ <= densities["gad67+"] + 1e-6) + + +def get_hierarchy_info(): + return pd.DataFrame( + { + "brain_region": [ + "Central lobule", + "Lobule II", + "Lobule II, granular layer", + "Lobule II, Purkinje layer", + "Lobule II, molecular layer", + ], + "descendant_id_set": [ + {920, 976, 10708, 10709, 10710}, + {976, 10708, 10709, 10710}, + {10708}, + {10709}, + {10710}, + ], + }, + index=[920, 976, 10708, 10709, 10710], + ) + + +def test_compute_region_cell_counts(): + annotation = np.array([[[920, 10710, 10710], [10709, 10708, 976], [10708, 10710, 10709]]]) + cell_density = np.array([[[1.0, 1.0 / 3.0, 1.0 / 3.0], [0.5, 0.5, 1.0], [0.5, 1.0 / 3.0, 0.5]]]) + voxel_volume = 2.0 + + hierarchy_info = get_hierarchy_info() + cell_counts = pd.DataFrame( + { + "brain_region": hierarchy_info["brain_region"], + "cell_count": 2 * np.array([5.0, 4.0, 1.0, 1.0, 1.0]), + }, + index=hierarchy_info.index, + ) + + pdt.assert_frame_equal( + cell_counts, # expected + tested._compute_region_cell_counts( + annotation, cell_density, voxel_volume, hierarchy_info=get_hierarchy_info() + ), + ) diff --git a/tests/densities/test_utils.py b/tests/densities/test_utils.py index fdea383..7be242d 100644 --- a/tests/densities/test_utils.py +++ b/tests/densities/test_utils.py @@ -132,21 +132,21 @@ def test_get_region_masks(): def test_optimize_distance_to_line_2D(): line_direction_vector = np.array([1, 2], dtype=float) upper_bounds = np.array([3, 1], dtype=float) - optimum = tested.optimize_distance_to_line( + optimum = tested._optimize_distance_to_line( line_direction_vector, upper_bounds, 3.0, threshold=1e-7, copy=True ) npt.assert_array_almost_equal(optimum, np.array([2.0, 1.0])) line_direction_vector = np.array([2, 3], dtype=float) upper_bounds = np.array([1, 3], dtype=float) - optimum = tested.optimize_distance_to_line( + optimum = tested._optimize_distance_to_line( line_direction_vector, upper_bounds, 2.0, threshold=1e-7, copy=False ) npt.assert_array_almost_equal(optimum, np.array([0.8, 1.2])) line_direction_vector = np.array([1, 2], dtype=float) upper_bounds = np.array([3, 1], dtype=float) - optimum = tested.optimize_distance_to_line( + optimum = tested._optimize_distance_to_line( line_direction_vector, upper_bounds, 5.0, threshold=1e-7 ) npt.assert_array_almost_equal(optimum, np.array([3.0, 1.0])) @@ -155,7 +155,7 @@ def test_optimize_distance_to_line_2D(): def test_optimize_distance_to_line_3D(): line_direction_vector = np.array([0.5, 2.0, 1.0], dtype=float) upper_bounds = np.array([1.0, 1.0, 1.0], dtype=float) - optimum = tested.optimize_distance_to_line( + optimum = tested._optimize_distance_to_line( line_direction_vector, upper_bounds, 2.0, threshold=1e-7, copy=True ) npt.assert_array_almost_equal(optimum, np.array([1.0 / 3.0, 1.0, 2.0 / 3.0])) @@ -251,7 +251,7 @@ def test_constrain_cell_counts_per_voxel_exceptions(): zero_cell_counts_mask = np.array([[[True, True, False, False, False]]]) max_cell_counts_mask = np.array([[[False, False, True, False, False]]]) with patch( - "atlas_densities.densities.utils.optimize_distance_to_line", + "atlas_densities.densities.utils._optimize_distance_to_line", return_value=cell_counts, ): tested.constrain_cell_counts_per_voxel( @@ -371,51 +371,3 @@ def test_compute_region_volumes(volumes, annotation): annotation, voxel_volume=2.0, hierarchy_info=get_hierarchy_info() ), ) - - -@pytest.fixture -def cell_counts(voxel_volume=2): - counts = voxel_volume * np.array([5.0, 4.0, 1.0, 1.0, 1.0]) - hierarchy_info = get_hierarchy_info() - return pd.DataFrame( - {"brain_region": hierarchy_info["brain_region"], "cell_count": counts}, - index=hierarchy_info.index, - ) - - -@pytest.fixture -def cell_density(): - return np.array([[[1.0, 1.0 / 3.0, 1.0 / 3.0], [0.5, 0.5, 1.0], [0.5, 1.0 / 3.0, 0.5]]]) - - -def test_compute_cell_counts(annotation, cell_density, cell_counts): - pdt.assert_frame_equal( - cell_counts, # expected - tested.compute_region_cell_counts( - annotation, cell_density, voxel_volume=2.0, hierarchy_info=get_hierarchy_info() - ), - ) - - -def test_zero_negative_values(): - array = np.array([0, 1, -0.02], dtype=float) - with pytest.raises( - AtlasDensitiesError, - match="absolute value of the sum of all negative values exceeds 1 percent of the sum of all positive values", - ): - tested.zero_negative_values(array) - - array = np.array([0, 1, -0.01], dtype=float) - with pytest.raises( - AtlasDensitiesError, - match="smallest negative value is not negligible wrt to the mean of all non-negative values", - ): - tested.zero_negative_values(array) - - array = np.array([0, 1, -1e-8 / 2.0], dtype=float) - tested.zero_negative_values(array) - npt.assert_array_almost_equal(array, np.array([0, 1, 0], dtype=float)) - - array = np.array([0, 1, 1], dtype=float) - tested.zero_negative_values(array) - npt.assert_array_almost_equal(array, np.array([0, 1, 1], dtype=float)) diff --git a/tox.ini b/tox.ini index ede8681..c795446 100644 --- a/tox.ini +++ b/tox.ini @@ -5,7 +5,7 @@ black_version = 22.0 [testenv] extras = tests passenv = PYTHONPATH -commands = pytest tests {posargs} +commands = pytest -W "ignore::DeprecationWarning:nptyping.typing_" tests {posargs} [testenv:lint] passenv = PYTHONPATH