diff --git a/atlas_densities/densities/fitting.py b/atlas_densities/densities/fitting.py index 3595160..b170ace 100644 --- a/atlas_densities/densities/fitting.py +++ b/atlas_densities/densities/fitting.py @@ -147,11 +147,11 @@ def fill_in_homogenous_regions( inhibitory_mask = homogenous_regions["cell_type"] == "inhibitory" for region_name in homogenous_regions[inhibitory_mask]["brain_region"]: - desc_id_set = hierarchy_info.at[region_name, "descendant_id_set"] + desc_id_set = hierarchy_info.at[region_name, "descendant_ids"] id_mask = [id_ in desc_id_set for id_ in hierarchy_info["id"]] for child_region_name in hierarchy_info[id_mask].index: region_mask = np.isin( - annotation, list(hierarchy_info.at[child_region_name, "descendant_id_set"]) + annotation, list(hierarchy_info.at[child_region_name, "descendant_ids"]) ) density = np.mean(neuron_density[region_mask]) densities.at[child_region_name, "inhibitory_neuron"] = density @@ -165,7 +165,7 @@ def fill_in_homogenous_regions( excitatory_mask = homogenous_regions["cell_type"] == "excitatory" for region_name in homogenous_regions[excitatory_mask]["brain_region"]: - desc_id_set = hierarchy_info.at[region_name, "descendant_id_set"] + desc_id_set = hierarchy_info.at[region_name, "descendant_ids"] id_mask = [id_ in desc_id_set for id_ in hierarchy_info["id"]] for child_region_name in hierarchy_info[id_mask].index: densities.at[child_region_name, "inhibitory_neuron"] = 0.0 @@ -232,7 +232,7 @@ def compute_average_intensities( } where each intensity array is of shape (W, H, D) and where the items of each slice list range in [0, W - 1]. - hierarchy_info: data frame with colums "descendant_id_set" (set[int]) and "brain_region" + hierarchy_info: data frame with colums "descendant_ids" (set[int]) and "brain_region" (str) and whose index is a list of region ids. See :fun:`atlas_densities.densities.utils.get_hierarchy_info`. @@ -260,7 +260,7 @@ def compute_average_intensities( region_count, ) for region_name in tqdm(result.index): - region_mask = np.isin(annotation, list(hierarchy_info.at[region_name, "descendant_id_set"])) + region_mask = np.isin(annotation, list(hierarchy_info.at[region_name, "descendant_ids"])) for marker, intensity in gene_marker_volumes.items(): result.at[region_name, marker.lower()] = compute_average_intensity( intensity["intensity"], region_mask, intensity["slices"] diff --git a/atlas_densities/densities/inhibitory_neuron_densities_helper.py b/atlas_densities/densities/inhibitory_neuron_densities_helper.py index 40f8ae0..632c10d 100644 --- a/atlas_densities/densities/inhibitory_neuron_densities_helper.py +++ b/atlas_densities/densities/inhibitory_neuron_densities_helper.py @@ -2,16 +2,12 @@ """ import logging -from typing import List, Optional, Set, Tuple +from typing import List import numpy as np import pandas as pd -from atlas_commons.typing import FloatArray - -from atlas_densities.exceptions import AtlasDensitiesError L = logging.getLogger(__name__) -MinMaxPair = Tuple[float, Optional[float]] def average_densities_to_cell_counts( @@ -34,11 +30,6 @@ def average_densities_to_cell_counts( :func:`atlas_densities.densities.utils.compute_region_volumes`. The volumes to be used are stored in the "volume" column. - - Note: Volumes can refer to volumes of entire regions, including all descendants - subregions, or volumes of single region identifiers, depending on the value of the - option `with_descendants` used when creating `region_volumes`. - Returns: data frame containing the cell counts of brain regions and their associated standard deviations. The data frame index is `average_densities.index` (brain region names) and @@ -54,35 +45,6 @@ def average_densities_to_cell_counts( return result -def resize_average_densities( - average_densities: pd.DataFrame, hierarchy_info: pd.DataFrame -) -> pd.DataFrame: - """ - Resize the `average_densities` data frame in such a way that the resized index coincides with - `hierarchy_info["brain_region"]`. - - Missing entries are set to ``np.nan``. - - average_densities: a data frame whose columns are described in - :func:`atlas_densities.densities.fitting.linear_fitting` containing the average - cell densities of brain regions and their associated standard deviations. Columns are - labelled by T and T_standard_deviation for various cell types T. The index of - `average_densities` is a list of region names. - hierarchy_info: data frame returned by - :func:`atlas_densities.densities.utils.get_hierarchy_info`. - - Returns: a data frame containing all the entries of `average_densities` and whose index is - `hierarchy_info["brain_region"]`. New entries are set to ``np.nan``. - - """ - resized = pd.DataFrame( - np.nan, index=hierarchy_info["brain_region"], columns=average_densities.columns - ) - resized.loc[average_densities.index] = average_densities - - return resized - - def get_cell_types(data_frame: pd.DataFrame) -> List[str]: """ Extract cell types from column labels. @@ -96,86 +58,3 @@ def get_cell_types(data_frame: pd.DataFrame) -> List[str]: """ return sorted({column.replace("_standard_deviation", "") for column in data_frame.columns}) - - -def check_region_counts_consistency( - region_counts: pd.DataFrame, hierarchy_info: pd.DataFrame, tolerance: float = 0.0 -) -> None: - """ - Check that cell counts considered as certain are consistent across the brain regions hierarchy. - - Args: - region_counts: data frame returned by - :fun:`densities.inhibitory_densities_helper.average_densities_to_cell_counts`. - A region is understood as a region of the brain hierarchy and includes all descendant - subregions. - hierarchy_info: data frame returned by - :func:`atlas_densities.densities.utils.get_hierarchy_info`. - tolerance: non-negative float number used as tolerance when comparing counts. - Defaults to 0.0. - - Raises: - AtlasDensitiesError if the sum of the cell counts of some leaf regions, all given with - certainty, exceeds the cell count of an ancestor, also given with certainty. - - """ - - def _check_descendants_consistency( - region_counts, hierarchy_info, region_name: str, id_set: Set[int], cell_type: str - ): - if region_counts.at[region_name, f"{cell_type}_standard_deviation"] == 0.0: - count = region_counts.at[region_name, cell_type] - descendants_names = hierarchy_info.loc[list(id_set), "brain_region"] - zero_std_mask = ( - region_counts.loc[descendants_names, f"{cell_type}_standard_deviation"] == 0.0 - ) - mask = zero_std_mask & ( - region_counts.loc[descendants_names, cell_type] > count + tolerance - ) - descendants_counts = region_counts.loc[descendants_names, cell_type][mask] - if not descendants_counts.empty: - raise AtlasDensitiesError( - f"Counts of {cell_type} cells in regions {list(descendants_names)} all exceed " - f"the count of its ancestor region {region_name} and each count is given " - f"for certain. The counts {descendants_counts} are all larger than " - f"{count}." - ) - names_with_certainty = descendants_names[zero_std_mask.to_list()].to_list() - leaf_names = [ - hierarchy_info.at[id_, "brain_region"] - for id_ in id_set - if len(hierarchy_info.at[id_, "descendant_id_set"]) == 1 - and hierarchy_info.at[id_, "brain_region"] in names_with_certainty - ] - leaves_count_sum = np.sum(region_counts.loc[leaf_names, cell_type]) - if leaves_count_sum > count + tolerance: - raise AtlasDensitiesError( - f"The sum of the counts of {cell_type} cells in leaf regions", - f" which are given with certainty, exceeds the count of the ancestor" - f" region {region_name}, also given with certainty: " - f"{leaves_count_sum} > {count}.", - ) - - cell_types = get_cell_types(region_counts) - for region_name, id_set in zip( - hierarchy_info["brain_region"], hierarchy_info["descendant_id_set"] - ): - for cell_type in cell_types: - _check_descendants_consistency( - region_counts, hierarchy_info, region_name, id_set, cell_type - ) - - -def replace_inf_with_none(bounds: FloatArray) -> List[MinMaxPair]: - """ - Replace the upper bounds equal to ``np.inf`` by None so as to comply with - the `bounds` interface of scipy.optimize.linprog. - - Args: - bounds: float array of shape (N, 2). Values in `bounds[..., 1]` can be - ``np.inf`` to indicate the absence of a constraining upper bound. - - Return: - list of pairs (min, max) where min is a float and max either a float or None. - """ - return [(float(min_), None if np.isinf(max_) else float(max_)) for min_, max_ in bounds] diff --git a/atlas_densities/densities/inhibitory_neuron_densities_optimization.py b/atlas_densities/densities/inhibitory_neuron_densities_optimization.py index 64d5987..389f2a7 100644 --- a/atlas_densities/densities/inhibitory_neuron_densities_optimization.py +++ b/atlas_densities/densities/inhibitory_neuron_densities_optimization.py @@ -36,10 +36,11 @@ variable deltas used in this module corresponds to the decision variables ``delta_{r, m}`` of the pdf file. """ +from __future__ import annotations import logging import warnings -from typing import Dict, List, Tuple +from typing import Dict, List, Optional, Set, Tuple import numpy as np import pandas as pd @@ -51,10 +52,7 @@ from atlas_densities.densities import utils from atlas_densities.densities.inhibitory_neuron_densities_helper import ( average_densities_to_cell_counts, - check_region_counts_consistency, get_cell_types, - replace_inf_with_none, - resize_average_densities, ) from atlas_densities.exceptions import AtlasDensitiesError, AtlasDensitiesWarning @@ -62,6 +60,114 @@ SKIP = np.inf # Used to signify that a delta variable of the linear program should be removed KEEP = np.nan # Used to signify that a delta variable should be added to the linear program +MinMaxPair = Tuple[float, Optional[float]] + + +def _resize_average_densities( + average_densities: pd.DataFrame, hierarchy_info: pd.DataFrame +) -> pd.DataFrame: + """ + Resize the `average_densities` data frame in such a way that the resized index coincides with + `hierarchy_info["brain_region"]`. + + Missing entries are set to ``np.nan``. + + average_densities: a data frame whose columns are described in + :func:`atlas_densities.densities.fitting.linear_fitting` containing the average + cell densities of brain regions and their associated standard deviations. Columns are + labelled by T and T_standard_deviation for various cell types T. The index of + `average_densities` is a list of region names. + hierarchy_info: data frame returned by + :func:`atlas_densities.densities.utils.get_hierarchy_info`. + + Returns: a data frame containing all the entries of `average_densities` and whose index is + `hierarchy_info["brain_region"]`. New entries are set to ``np.nan``. + + """ + resized = pd.DataFrame( + np.nan, index=hierarchy_info["brain_region"], columns=average_densities.columns + ) + resized.loc[average_densities.index] = average_densities + + return resized + + +def _check_region_counts_consistency( + region_counts: pd.DataFrame, hierarchy_info: pd.DataFrame, tolerance: float = 0.0 +) -> None: + """ + Check that cell counts considered as certain are consistent across the brain regions hierarchy. + + Args: + region_counts: data frame returned by + :fun:`densities.inhibitory_densities_helper.average_densities_to_cell_counts`. + A region is understood as a region of the brain hierarchy and includes all descendant + subregions. + hierarchy_info: data frame returned by + :func:`atlas_densities.densities.utils.get_hierarchy_info`. + tolerance: non-negative float number used as tolerance when comparing counts. + Defaults to 0.0. + + Raises: + AtlasDensitiesError if the sum of the cell counts of some leaf regions, all given with + certainty, exceeds the cell count of an ancestor, also given with certainty. + + """ + + def _check_descendants_consistency(region_name: str, id_set: Set[int], cell_type: str): + standard_deviation = f"{cell_type}_standard_deviation" + + if region_counts.at[region_name, standard_deviation] != 0.0: + return + + # count is certain, since standard_deviation is 0 + count = region_counts.at[region_name, cell_type] + descendants_names = hierarchy_info.loc[list(id_set), "brain_region"] + zero_std_mask = region_counts.loc[descendants_names, standard_deviation] == 0.0 + mask = zero_std_mask & (region_counts.loc[descendants_names, cell_type] > count + tolerance) + descendants_counts = region_counts.loc[descendants_names, cell_type][mask] + if not descendants_counts.empty: + raise AtlasDensitiesError( + f"Counts of {cell_type} cells in regions {list(descendants_names)} all exceed " + f"the count of its ancestor region {region_name} and each count is given " + f"for certain. The counts {descendants_counts} are all larger than " + f"{count}." + ) + + names_with_certainty = descendants_names[zero_std_mask.to_list()].to_list() + leaf_names = [ + hierarchy_info.at[id_, "brain_region"] + for id_ in id_set + if len(hierarchy_info.at[id_, "descendant_ids"]) == 1 + and hierarchy_info.at[id_, "brain_region"] in names_with_certainty + ] + leaves_count_sum = np.sum(region_counts.loc[leaf_names, cell_type]) + if leaves_count_sum > count + tolerance: + raise AtlasDensitiesError( + f"The sum of the counts of {cell_type} cells in leaf regions", + " which are given with certainty, exceeds the count of the ancestor" + f" region {region_name}, also given with certainty: " + f"{leaves_count_sum} > {count}.", + ) + + for _, region_name, id_set in hierarchy_info[["brain_region", "descendant_ids"]].itertuples(): + for cell_type in get_cell_types(region_counts): + _check_descendants_consistency(region_name, id_set, cell_type) + + +def _replace_inf_with_none(bounds: FloatArray) -> List[MinMaxPair]: + """ + Replace the upper bounds equal to ``np.inf`` by None so as to comply with + the `bounds` interface of scipy.optimize.linprog. + + Args: + bounds: float array of shape (N, 2). Values in `bounds[..., 1]` can be + ``np.inf`` to indicate the absence of a constraining upper bound. + + Return: + list of pairs (min, max) where min is a float and max either a float or None. + """ + return [(float(min_), None if np.isinf(max_) else float(max_)) for min_, max_ in bounds] def _compute_region_cell_counts( @@ -69,7 +175,6 @@ def _compute_region_cell_counts( density: FloatArray, voxel_volume: float, hierarchy_info: "pd.DataFrame", - with_descendants: bool = True, ) -> "pd.DataFrame": """ Compute the cell count of every 3D region in `annotation` labeled by a unique @@ -85,10 +190,6 @@ def _compute_region_cell_counts( This is (25 * 1e-6) ** 3 for an AIBS atlas nrrd file with 25um resolution. hierarchy_info: data frame returned by :func:`atlas_densities.densities.utils.get_hierarchy_info`. - with_descendants: if True, a computed cell count refers to the entire 3D region - designated by a region name, including all descendants subregions. Otherwise, the - computed cell count is the cell count of the 3D region labeled by the unique region - identifier. Defaults to True. Returns: DataFrame of the following form (values are fake): @@ -108,13 +209,6 @@ def _compute_region_cell_counts( index=hierarchy_info.index, ) - if with_descendants: - counts = [] - for id_set in tqdm(hierarchy_info["descendant_id_set"]): - count = result.loc[list(id_set), "cell_count"].sum() - counts.append(count) - result["cell_count"] = counts - return result @@ -174,7 +268,7 @@ def _zero_descendants(id_, cell_type, x_result, deltas, hierarchy_info): Zeroes the count of `cell_type` cells in every descendant regions of `id_`, including `id_`. """ - desc_ids = list(hierarchy_info.at[id_, "descendant_id_set"]) + desc_ids = list(hierarchy_info.at[id_, "descendant_ids"]) x_result.loc[desc_ids, cell_type] = 0.0 desc_names = hierarchy_info.loc[desc_ids, "brain_region"] deltas.loc[desc_names, cell_type] = 0.0 @@ -369,9 +463,7 @@ def check_constraint_3e( b_ub = [] variable_count = len(x_map) + len(deltas_map) cell_types = get_cell_types(region_counts) - for id_, region_name, set_ in zip( - hierarchy_info.index, hierarchy_info["brain_region"], hierarchy_info["descendant_id_set"] - ): + for id_, region_name, set_ in hierarchy_info[["brain_region", "descendant_ids"]].itertuples(): for cell_type in cell_types: if (region_name, cell_type) in deltas_map: constraint = np.zeros((variable_count,), dtype=float) @@ -490,9 +582,7 @@ def _compute_initial_cell_counts( L.info("Computing cell count estimates in every 3D region ...") region_counts = average_densities_to_cell_counts(average_densities, volumes) - # Detect cell counts inconsistency between a region and its descendants when - # estimates are deemed as certain. - check_region_counts_consistency(region_counts, hierarchy_info) + _check_region_counts_consistency(region_counts, hierarchy_info) L.info("Computing cell count estimates in atomic 3D region ...") volumes.drop("volume", axis=1, inplace=True) @@ -535,7 +625,7 @@ def _check_variables_consistency( """ cell_count_tolerance = 1e-2 # absolute tolerance to rule out round-off errors for region_name, id_, id_set in zip( - deltas.index, hierarchy_info.index, hierarchy_info["descendant_id_set"] + deltas.index, hierarchy_info.index, hierarchy_info["descendant_ids"] ): for cell_type in cell_types: if np.isfinite(deltas.loc[region_name, cell_type]): @@ -662,7 +752,7 @@ def create_inhibitory_neuron_densities( # pylint: disable=too-many-locals """ hierarchy_info = utils.get_hierarchy_info(RegionMap.from_dict(hierarchy), root=region_name) - average_densities = resize_average_densities(average_densities, hierarchy_info) + average_densities = _resize_average_densities(average_densities, hierarchy_info) L.info("Initialization of the linear program: started") region_counts, id_counts = _compute_initial_cell_counts( @@ -671,7 +761,7 @@ def create_inhibitory_neuron_densities( # pylint: disable=too-many-locals L.info("Retrieving overall neuron counts in atomic 3D regions ...") neuron_counts = _compute_region_cell_counts( - annotation, neuron_density, voxel_volume, hierarchy_info, with_descendants=False + annotation, neuron_density, voxel_volume, hierarchy_info ) assert np.all(neuron_counts["cell_count"] >= 0.0) @@ -712,7 +802,7 @@ def create_inhibitory_neuron_densities( # pylint: disable=too-many-locals c=c_row, A_ub=a_ub, b_ub=b_ub, - bounds=replace_inf_with_none(bounds), + bounds=_replace_inf_with_none(bounds), method="highs", ) if not result.success: diff --git a/atlas_densities/densities/measurement_to_density.py b/atlas_densities/densities/measurement_to_density.py index 65170e4..d300304 100644 --- a/atlas_densities/densities/measurement_to_density.py +++ b/atlas_densities/densities/measurement_to_density.py @@ -72,7 +72,7 @@ def compute_region_densities( The index is the sorted list of all region identifiers. """ densities = [] - descendants = hierarchy_info["descendant_id_set"] + descendants = hierarchy_info["descendant_ids"] for iset_ in tqdm(range(len(descendants))): mask = np.isin(annotation, list(descendants.iloc[iset_])) densities.append(np.sum(cell_density[mask]) / np.count_nonzero(mask)) @@ -245,7 +245,7 @@ def cell_count_per_slice_to_density( cell_counts_per_slice = measurements[mask_50um] hierarchy_info = hierarchy_info.set_index("brain_region") for index, row in cell_counts_per_slice.iterrows(): - id_set = hierarchy_info.loc[row["brain_region"], "descendant_id_set"] + id_set = hierarchy_info.loc[row["brain_region"], "descendant_ids"] average_slice_volume = voxel_volume * get_average_voxel_count_per_slice( id_set, annotation, thickness ) diff --git a/atlas_densities/densities/refined_inhibitory_neuron_densities.py b/atlas_densities/densities/refined_inhibitory_neuron_densities.py index 284e9ed..f8552cc 100644 --- a/atlas_densities/densities/refined_inhibitory_neuron_densities.py +++ b/atlas_densities/densities/refined_inhibitory_neuron_densities.py @@ -12,6 +12,7 @@ comparisons with the former approach of "A Cell Atlas for the Mouse Brain" by C. Eroe et al., 2018. """ +from __future__ import annotations import copy import logging @@ -24,6 +25,7 @@ from atlas_densities.densities.inhibitory_neuron_densities_helper import ( average_densities_to_cell_counts, + get_cell_types, ) from atlas_densities.densities.utils import compute_region_volumes, get_hierarchy_info @@ -368,7 +370,7 @@ def create_inhibitory_neuron_densities( # pylint: disable=too-many-locals annotation: AnnotationT, voxel_volume: float, neuron_density: FloatArray, - average_densities: "pd.DataFrame", + average_densities: pd.DataFrame, region_name: str = "root", ) -> Dict[str, FloatArray]: """ @@ -428,7 +430,7 @@ def create_inhibitory_neuron_densities( # pylint: disable=too-many-locals # modify cell densities recursively starting from the leaves L.info("Computing region cell counts ...") region_counts = average_densities_to_cell_counts(average_densities, volumes) - cell_types = {column.replace("_standard_deviation", "") for column in region_counts.columns} + cell_types = get_cell_types(region_counts) density_helper = VolumetricDensityHelper( annotation, @@ -436,7 +438,7 @@ def create_inhibitory_neuron_densities( # pylint: disable=too-many-locals neuron_density, region_counts, "gad67+", - list(cell_types - {"gad67+"}), + list(set(cell_types) - {"gad67+"}), ) L.info("Making leaf region cell counts consistent ...") diff --git a/atlas_densities/densities/utils.py b/atlas_densities/densities/utils.py index cd917f9..8648351 100644 --- a/atlas_densities/densities/utils.py +++ b/atlas_densities/densities/utils.py @@ -12,7 +12,6 @@ import scipy.misc import scipy.ndimage from atlas_commons.typing import AnnotationT, BoolArray, FloatArray -from tqdm import tqdm from atlas_densities.exceptions import AtlasDensitiesError, AtlasDensitiesWarning from atlas_densities.utils import copy_array @@ -458,7 +457,7 @@ def get_hierarchy_info( root: str = "Basic cell groups and regions", ) -> "pd.DataFrame": """ - Returns the name and the descendant_id_set of each region that can be found by `region_map`. + Returns the name and the descendant_ids of each region that can be found by `region_map`. Note: We assume that the hierarchy file has unique brain region names. @@ -471,24 +470,22 @@ def get_hierarchy_info( Returns: returns a dataframe with index a list of unique region ids and two columns (values are fake) - descendant_id_set brain_region + descendant_ids brain_region 1 {1, 3} "Cerebellum" 2 {2, 4, 10} "Isocortex" ... ... The index consists in the sorted list of the identifiers of every region recorded in - `region_map` under root. The column `descendant_id_set` holds for each region the set of + `region_map` under root. The column `descendant_ids` holds for each region the set of identifiers of the descendants including the region itself. `brain region` is the list of every region name. """ region_ids = list(region_map.find(root, attr="name", with_descendants=True)) region_ids.sort() - descendant_id_sets = [ - region_map.find(id_, attr="id", with_descendants=True) for id_ in region_ids - ] + descendant_ids = [region_map.find(id_, attr="id", with_descendants=True) for id_ in region_ids] region_names = [region_map.get(id_, attr="name") for id_ in region_ids] data_frame = pd.DataFrame( - {"brain_region": region_names, "descendant_id_set": descendant_id_sets}, + {"brain_region": region_names, "descendant_ids": descendant_ids}, index=region_ids, ) @@ -524,21 +521,20 @@ def compute_region_volumes( The latter column is created only if `hierarchy_info` has no `id_set` column, in which case its index is made of unique integer identifiers. """ - id_volumes = [] - for id_ in tqdm(hierarchy_info.index): - id_volumes.append(np.count_nonzero(annotation == id_) * voxel_volume) - result = pd.DataFrame( { "brain_region": hierarchy_info["brain_region"], - "id_volume": id_volumes, + "id_volume": 0.0, }, index=hierarchy_info.index, ) + ids, counts = np.unique(annotation, return_counts=True) + result["id_volume"] = pd.Series(counts * voxel_volume, index=ids) + volumes = [] - for id_ in tqdm(hierarchy_info.index): - id_set = hierarchy_info.loc[id_, "descendant_id_set"] + for id_ in hierarchy_info.index: + id_set = hierarchy_info.loc[id_, "descendant_ids"] volume = result.loc[list(id_set), "id_volume"].sum() volumes.append(volume) result["volume"] = volumes diff --git a/tests/densities/test_inhibitory_neuron_densities_helper.py b/tests/densities/test_inhibitory_neuron_densities_helper.py index f0330d0..b7b352d 100644 --- a/tests/densities/test_inhibitory_neuron_densities_helper.py +++ b/tests/densities/test_inhibitory_neuron_densities_helper.py @@ -98,42 +98,6 @@ def test_average_densities_to_cell_counts_with_reverse_index_order(): ) -def test_resize_average_densities(): - average_densities = pd.DataFrame( - { - "gad67+": [9.0, 12.0], - "gad67+_standard_deviation": [0.9, 1.2], - "pv+": [1.0, 2.0], - "pv+_standard_deviation": [0.1, 0.2], - "sst+": [3.0, 4.0], - "sst+_standard_deviation": [0.3, 0.4], - "vip+": [5.0, 6.0], - "vip+_standard_deviation": [0.5, 0.6], - }, - index=["A", "B"], - ) - hierarchy_info = pd.DataFrame( - {"brain_region": ["A", "B", "C"], "descendant_id_set": [{1}, {2}, {1, 2, 3}]}, - index=[1, 2, 3], - ) - - expected = pd.DataFrame( - { - "gad67+": [9.0, 12.0, np.nan], - "gad67+_standard_deviation": [0.9, 1.2, np.nan], - "pv+": [1.0, 2.0, np.nan], - "pv+_standard_deviation": [0.1, 0.2, np.nan], - "sst+": [3.0, 4.0, np.nan], - "sst+_standard_deviation": [0.3, 0.4, np.nan], - "vip+": [5.0, 6.0, np.nan], - "vip+_standard_deviation": [0.5, 0.6, np.nan], - }, - index=hierarchy_info["brain_region"], - ) - actual = tested.resize_average_densities(average_densities, hierarchy_info) - pdt.assert_frame_equal(actual, expected) - - def test_get_cell_types(): average_densities = pd.DataFrame( { @@ -150,49 +114,3 @@ def test_get_cell_types(): ) assert tested.get_cell_types(average_densities) == ["gad67+", "pv+", "sst+", "vip+"] - - -def test_check_region_counts_consistency(): - region_counts = pd.DataFrame( - { - "gad67+": [9.0, 12.0, 2.0], - "gad67+_standard_deviation": [0.9, 1.2, 1.0], - "pv+": [1.0, 1.0, 1.0], - "pv+_standard_deviation": [0.1, 0.2, 0.3], - "sst+": [3.0, 4.0, 5.0], - "sst+_standard_deviation": [0.3, 0.4, 0.5], - "vip+": [5.0, 6.0, 7.0], - "vip+_standard_deviation": [0.5, 0.6, 0.7], - }, - index=["A", "B", "C"], - ) - hierarchy_info = pd.DataFrame( - {"brain_region": ["A", "B", "C"], "descendant_id_set": [{1, 2, 3}, {2}, {3}]}, - index=[1, 2, 3], - ) - - # Uncertain or consistent - tested.check_region_counts_consistency(region_counts, hierarchy_info) - region_counts.loc["A", "gad67+_standard_deviation"] = 0.0 - tested.check_region_counts_consistency(region_counts, hierarchy_info) - - # Inconsistent parent-child count inequality - region_counts.loc["B", "gad67+_standard_deviation"] = 0.0 - with pytest.raises(AtlasDensitiesError, match=r"gad67\+.*exceed"): - tested.check_region_counts_consistency(region_counts, hierarchy_info) - - # The sum of the cell counts of the children regions B and C exceeds the count - # of the parent region A. - region_counts.loc["B", "gad67+_standard_deviation"] = 1.2 # restores valid value - region_counts.loc["A", "pv+_standard_deviation"] = 0.0 - region_counts.loc["B", "pv+_standard_deviation"] = 0.0 - region_counts.loc["C", "pv+_standard_deviation"] = 0.0 - with pytest.raises(AtlasDensitiesError, match=r"sum of the counts.*pv\+.*exceeds.*region A"): - tested.check_region_counts_consistency(region_counts, hierarchy_info) - - -def test_replace_inf_with_none(): - bounds = np.array([[0.1, 1.0], [-1.0, np.inf]]) - result = [(0.1, 1.0), (-1.0, None)] - - assert repr(result) == repr(tested.replace_inf_with_none(bounds)) diff --git a/tests/densities/test_inhibitory_neuron_density_optimization.py b/tests/densities/test_inhibitory_neuron_density_optimization.py index c7424a7..ba5276b 100644 --- a/tests/densities/test_inhibitory_neuron_density_optimization.py +++ b/tests/densities/test_inhibitory_neuron_density_optimization.py @@ -13,6 +13,90 @@ from atlas_densities.exceptions import AtlasDensitiesError +def test_resize_average_densities(): + average_densities = pd.DataFrame( + { + "gad67+": [9.0, 12.0], + "gad67+_standard_deviation": [0.9, 1.2], + "pv+": [1.0, 2.0], + "pv+_standard_deviation": [0.1, 0.2], + "sst+": [3.0, 4.0], + "sst+_standard_deviation": [0.3, 0.4], + "vip+": [5.0, 6.0], + "vip+_standard_deviation": [0.5, 0.6], + }, + index=["A", "B"], + ) + hierarchy_info = pd.DataFrame( + { + "brain_region": ["A", "B", "C"], + }, + index=[1, 2, 3], + ) + + expected = pd.DataFrame( + { + "gad67+": [9.0, 12.0, np.nan], + "gad67+_standard_deviation": [0.9, 1.2, np.nan], + "pv+": [1.0, 2.0, np.nan], + "pv+_standard_deviation": [0.1, 0.2, np.nan], + "sst+": [3.0, 4.0, np.nan], + "sst+_standard_deviation": [0.3, 0.4, np.nan], + "vip+": [5.0, 6.0, np.nan], + "vip+_standard_deviation": [0.5, 0.6, np.nan], + }, + index=hierarchy_info["brain_region"], + ) + actual = tested._resize_average_densities(average_densities, hierarchy_info) + pdt.assert_frame_equal(actual, expected) + + +def test_check_region_counts_consistency(): + region_counts = pd.DataFrame( + { + "gad67+": [9.0, 12.0, 2.0], + "gad67+_standard_deviation": [0.9, 1.2, 1.0], + "pv+": [1.0, 1.0, 1.0], + "pv+_standard_deviation": [0.1, 0.2, 0.3], + "sst+": [3.0, 4.0, 5.0], + "sst+_standard_deviation": [0.3, 0.4, 0.5], + "vip+": [5.0, 6.0, 7.0], + "vip+_standard_deviation": [0.5, 0.6, 0.7], + }, + index=["A", "B", "C"], + ) + hierarchy_info = pd.DataFrame( + {"brain_region": ["A", "B", "C"], "descendant_ids": [{1, 2, 3}, {2}, {3}]}, + index=[1, 2, 3], + ) + + # Uncertain or consistent + tested._check_region_counts_consistency(region_counts, hierarchy_info) + region_counts.loc["A", "gad67+_standard_deviation"] = 0.0 + tested._check_region_counts_consistency(region_counts, hierarchy_info) + + # Inconsistent parent-child count inequality + region_counts.loc["B", "gad67+_standard_deviation"] = 0.0 + with pytest.raises(AtlasDensitiesError, match=r"gad67\+.*exceed"): + tested._check_region_counts_consistency(region_counts, hierarchy_info) + + # The sum of the cell counts of the children regions B and C exceeds the count + # of the parent region A. + region_counts.loc["B", "gad67+_standard_deviation"] = 1.2 # restores valid value + region_counts.loc["A", "pv+_standard_deviation"] = 0.0 + region_counts.loc["B", "pv+_standard_deviation"] = 0.0 + region_counts.loc["C", "pv+_standard_deviation"] = 0.0 + with pytest.raises(AtlasDensitiesError, match=r"sum of the counts.*pv\+.*exceeds.*region A"): + tested._check_region_counts_consistency(region_counts, hierarchy_info) + + +def test_replace_inf_with_none(): + bounds = np.array([[0.1, 1.0], [-1.0, np.inf]]) + result = [(0.1, 1.0), (-1.0, None)] + + assert repr(result) == repr(tested._replace_inf_with_none(bounds)) + + def test_create_volumetric_densities(): annotation = np.array([[[1, 1, 2]]], dtype=int) neuron_density = np.array([[[0.5, 0.5, 1.0]]]) @@ -342,13 +426,6 @@ def get_hierarchy_info(): "Lobule II, Purkinje layer", "Lobule II, molecular layer", ], - "descendant_id_set": [ - {920, 976, 10708, 10709, 10710}, - {976, 10708, 10709, 10710}, - {10708}, - {10709}, - {10710}, - ], }, index=[920, 976, 10708, 10709, 10710], ) @@ -363,7 +440,7 @@ def test_compute_region_cell_counts(): cell_counts = pd.DataFrame( { "brain_region": hierarchy_info["brain_region"], - "cell_count": 2 * np.array([5.0, 4.0, 1.0, 1.0, 1.0]), + "cell_count": 2 * np.array([1.0, 1.0, 1.0, 1.0, 1.0]), }, index=hierarchy_info.index, ) diff --git a/tests/densities/test_optimization_initialization.py b/tests/densities/test_optimization_initialization.py index abf2540..f15bdc3 100644 --- a/tests/densities/test_optimization_initialization.py +++ b/tests/densities/test_optimization_initialization.py @@ -16,7 +16,7 @@ @pytest.fixture def counts_data_1(): hierarchy_info = pd.DataFrame( - {"brain_region": ["A", "B"], "descendant_id_set": [{1}, {2}]}, index=[1, 2] + {"brain_region": ["A", "B"], "descendant_ids": [{1}, {2}]}, index=[1, 2] ) region_counts = pd.DataFrame( { @@ -162,7 +162,7 @@ def test_set_known_values_3(counts_data_1): def counts_data_2(): hierarchy_info = pd.DataFrame( # A is now a parent region of B - {"brain_region": ["A", "B"], "descendant_id_set": [{1, 2}, {2}]}, + {"brain_region": ["A", "B"], "descendant_ids": [{1, 2}, {2}]}, index=[1, 2], ) region_counts = pd.DataFrame( @@ -335,7 +335,7 @@ def neuron_counts(): def counts_data_3(): hierarchy_info = pd.DataFrame( - {"brain_region": ["A", "B", "C"], "descendant_id_set": [{1, 2, 3}, {2}, {3}]}, + {"brain_region": ["A", "B", "C"], "descendant_ids": [{1, 2, 3}, {2}, {3}]}, index=[1, 2, 3], ) region_counts = pd.DataFrame( @@ -571,7 +571,7 @@ def test_create_bounds_2(): @pytest.fixture def aub_and_bub_data(): hierarchy_info = pd.DataFrame( - {"brain_region": ["A", "B", "C"], "descendant_id_set": [{1, 2, 3}, {2}, {3}]}, + {"brain_region": ["A", "B", "C"], "descendant_ids": [{1, 2, 3}, {2}, {3}]}, index=[1, 2, 3], ) x_map, deltas_map = expected_maps() diff --git a/tests/densities/test_utils.py b/tests/densities/test_utils.py index 5680a7f..a370327 100644 --- a/tests/densities/test_utils.py +++ b/tests/densities/test_utils.py @@ -353,7 +353,7 @@ def get_hierarchy_info(): "Lobule II, Purkinje layer", "Lobule II, molecular layer", ], - "descendant_id_set": [ + "descendant_ids": [ {920, 976, 10708, 10709, 10710}, {976, 10708, 10709, 10710}, {10708},