From 08e2570fffbacebf3b60c19b2704fc199b6fe285 Mon Sep 17 00:00:00 2001 From: Mike Gevaert Date: Wed, 14 Feb 2024 13:55:18 +0100 Subject: [PATCH] rename descendant_id_set -> descendant_ids start _check_descendants_consistency simplification --- atlas_densities/densities/fitting.py | 10 +-- ...nhibitory_neuron_densities_optimization.py | 87 +++++++++---------- .../densities/measurement_to_density.py | 4 +- atlas_densities/densities/utils.py | 14 ++- ..._inhibitory_neuron_density_optimization.py | 6 +- .../test_optimization_initialization.py | 8 +- tests/densities/test_utils.py | 2 +- 7 files changed, 61 insertions(+), 70 deletions(-) diff --git a/atlas_densities/densities/fitting.py b/atlas_densities/densities/fitting.py index 3595160..b170ace 100644 --- a/atlas_densities/densities/fitting.py +++ b/atlas_densities/densities/fitting.py @@ -147,11 +147,11 @@ def fill_in_homogenous_regions( inhibitory_mask = homogenous_regions["cell_type"] == "inhibitory" for region_name in homogenous_regions[inhibitory_mask]["brain_region"]: - desc_id_set = hierarchy_info.at[region_name, "descendant_id_set"] + desc_id_set = hierarchy_info.at[region_name, "descendant_ids"] id_mask = [id_ in desc_id_set for id_ in hierarchy_info["id"]] for child_region_name in hierarchy_info[id_mask].index: region_mask = np.isin( - annotation, list(hierarchy_info.at[child_region_name, "descendant_id_set"]) + annotation, list(hierarchy_info.at[child_region_name, "descendant_ids"]) ) density = np.mean(neuron_density[region_mask]) densities.at[child_region_name, "inhibitory_neuron"] = density @@ -165,7 +165,7 @@ def fill_in_homogenous_regions( excitatory_mask = homogenous_regions["cell_type"] == "excitatory" for region_name in homogenous_regions[excitatory_mask]["brain_region"]: - desc_id_set = hierarchy_info.at[region_name, "descendant_id_set"] + desc_id_set = hierarchy_info.at[region_name, "descendant_ids"] id_mask = [id_ in desc_id_set for id_ in hierarchy_info["id"]] for child_region_name in hierarchy_info[id_mask].index: densities.at[child_region_name, "inhibitory_neuron"] = 0.0 @@ -232,7 +232,7 @@ def compute_average_intensities( } where each intensity array is of shape (W, H, D) and where the items of each slice list range in [0, W - 1]. - hierarchy_info: data frame with colums "descendant_id_set" (set[int]) and "brain_region" + hierarchy_info: data frame with colums "descendant_ids" (set[int]) and "brain_region" (str) and whose index is a list of region ids. See :fun:`atlas_densities.densities.utils.get_hierarchy_info`. @@ -260,7 +260,7 @@ def compute_average_intensities( region_count, ) for region_name in tqdm(result.index): - region_mask = np.isin(annotation, list(hierarchy_info.at[region_name, "descendant_id_set"])) + region_mask = np.isin(annotation, list(hierarchy_info.at[region_name, "descendant_ids"])) for marker, intensity in gene_marker_volumes.items(): result.at[region_name, marker.lower()] = compute_average_intensity( intensity["intensity"], region_mask, intensity["slices"] diff --git a/atlas_densities/densities/inhibitory_neuron_densities_optimization.py b/atlas_densities/densities/inhibitory_neuron_densities_optimization.py index 6dd6659..389f2a7 100644 --- a/atlas_densities/densities/inhibitory_neuron_densities_optimization.py +++ b/atlas_densities/densities/inhibitory_neuron_densities_optimization.py @@ -114,51 +114,46 @@ def _check_region_counts_consistency( """ - def _check_descendants_consistency( - region_counts, hierarchy_info, region_name: str, id_set: Set[int], cell_type: str - ): - if region_counts.at[region_name, f"{cell_type}_standard_deviation"] == 0.0: - count = region_counts.at[region_name, cell_type] - descendants_names = hierarchy_info.loc[list(id_set), "brain_region"] - zero_std_mask = ( - region_counts.loc[descendants_names, f"{cell_type}_standard_deviation"] == 0.0 - ) - mask = zero_std_mask & ( - region_counts.loc[descendants_names, cell_type] > count + tolerance + def _check_descendants_consistency(region_name: str, id_set: Set[int], cell_type: str): + standard_deviation = f"{cell_type}_standard_deviation" + + if region_counts.at[region_name, standard_deviation] != 0.0: + return + + # count is certain, since standard_deviation is 0 + count = region_counts.at[region_name, cell_type] + descendants_names = hierarchy_info.loc[list(id_set), "brain_region"] + zero_std_mask = region_counts.loc[descendants_names, standard_deviation] == 0.0 + mask = zero_std_mask & (region_counts.loc[descendants_names, cell_type] > count + tolerance) + descendants_counts = region_counts.loc[descendants_names, cell_type][mask] + if not descendants_counts.empty: + raise AtlasDensitiesError( + f"Counts of {cell_type} cells in regions {list(descendants_names)} all exceed " + f"the count of its ancestor region {region_name} and each count is given " + f"for certain. The counts {descendants_counts} are all larger than " + f"{count}." ) - descendants_counts = region_counts.loc[descendants_names, cell_type][mask] - if not descendants_counts.empty: - raise AtlasDensitiesError( - f"Counts of {cell_type} cells in regions {list(descendants_names)} all exceed " - f"the count of its ancestor region {region_name} and each count is given " - f"for certain. The counts {descendants_counts} are all larger than " - f"{count}." - ) - names_with_certainty = descendants_names[zero_std_mask.to_list()].to_list() - leaf_names = [ - hierarchy_info.at[id_, "brain_region"] - for id_ in id_set - if len(hierarchy_info.at[id_, "descendant_id_set"]) == 1 - and hierarchy_info.at[id_, "brain_region"] in names_with_certainty - ] - leaves_count_sum = np.sum(region_counts.loc[leaf_names, cell_type]) - if leaves_count_sum > count + tolerance: - raise AtlasDensitiesError( - f"The sum of the counts of {cell_type} cells in leaf regions", - f" which are given with certainty, exceeds the count of the ancestor" - f" region {region_name}, also given with certainty: " - f"{leaves_count_sum} > {count}.", - ) - cell_types = get_cell_types(region_counts) - for region_name, id_set in zip( - hierarchy_info["brain_region"], hierarchy_info["descendant_id_set"] - ): - for cell_type in cell_types: - _check_descendants_consistency( - region_counts, hierarchy_info, region_name, id_set, cell_type + names_with_certainty = descendants_names[zero_std_mask.to_list()].to_list() + leaf_names = [ + hierarchy_info.at[id_, "brain_region"] + for id_ in id_set + if len(hierarchy_info.at[id_, "descendant_ids"]) == 1 + and hierarchy_info.at[id_, "brain_region"] in names_with_certainty + ] + leaves_count_sum = np.sum(region_counts.loc[leaf_names, cell_type]) + if leaves_count_sum > count + tolerance: + raise AtlasDensitiesError( + f"The sum of the counts of {cell_type} cells in leaf regions", + " which are given with certainty, exceeds the count of the ancestor" + f" region {region_name}, also given with certainty: " + f"{leaves_count_sum} > {count}.", ) + for _, region_name, id_set in hierarchy_info[["brain_region", "descendant_ids"]].itertuples(): + for cell_type in get_cell_types(region_counts): + _check_descendants_consistency(region_name, id_set, cell_type) + def _replace_inf_with_none(bounds: FloatArray) -> List[MinMaxPair]: """ @@ -273,7 +268,7 @@ def _zero_descendants(id_, cell_type, x_result, deltas, hierarchy_info): Zeroes the count of `cell_type` cells in every descendant regions of `id_`, including `id_`. """ - desc_ids = list(hierarchy_info.at[id_, "descendant_id_set"]) + desc_ids = list(hierarchy_info.at[id_, "descendant_ids"]) x_result.loc[desc_ids, cell_type] = 0.0 desc_names = hierarchy_info.loc[desc_ids, "brain_region"] deltas.loc[desc_names, cell_type] = 0.0 @@ -468,9 +463,7 @@ def check_constraint_3e( b_ub = [] variable_count = len(x_map) + len(deltas_map) cell_types = get_cell_types(region_counts) - for id_, region_name, set_ in zip( - hierarchy_info.index, hierarchy_info["brain_region"], hierarchy_info["descendant_id_set"] - ): + for id_, region_name, set_ in hierarchy_info[["brain_region", "descendant_ids"]].itertuples(): for cell_type in cell_types: if (region_name, cell_type) in deltas_map: constraint = np.zeros((variable_count,), dtype=float) @@ -589,8 +582,6 @@ def _compute_initial_cell_counts( L.info("Computing cell count estimates in every 3D region ...") region_counts = average_densities_to_cell_counts(average_densities, volumes) - # Detect cell counts inconsistency between a region and its descendants when - # estimates are deemed as certain. _check_region_counts_consistency(region_counts, hierarchy_info) L.info("Computing cell count estimates in atomic 3D region ...") @@ -634,7 +625,7 @@ def _check_variables_consistency( """ cell_count_tolerance = 1e-2 # absolute tolerance to rule out round-off errors for region_name, id_, id_set in zip( - deltas.index, hierarchy_info.index, hierarchy_info["descendant_id_set"] + deltas.index, hierarchy_info.index, hierarchy_info["descendant_ids"] ): for cell_type in cell_types: if np.isfinite(deltas.loc[region_name, cell_type]): diff --git a/atlas_densities/densities/measurement_to_density.py b/atlas_densities/densities/measurement_to_density.py index 65170e4..d300304 100644 --- a/atlas_densities/densities/measurement_to_density.py +++ b/atlas_densities/densities/measurement_to_density.py @@ -72,7 +72,7 @@ def compute_region_densities( The index is the sorted list of all region identifiers. """ densities = [] - descendants = hierarchy_info["descendant_id_set"] + descendants = hierarchy_info["descendant_ids"] for iset_ in tqdm(range(len(descendants))): mask = np.isin(annotation, list(descendants.iloc[iset_])) densities.append(np.sum(cell_density[mask]) / np.count_nonzero(mask)) @@ -245,7 +245,7 @@ def cell_count_per_slice_to_density( cell_counts_per_slice = measurements[mask_50um] hierarchy_info = hierarchy_info.set_index("brain_region") for index, row in cell_counts_per_slice.iterrows(): - id_set = hierarchy_info.loc[row["brain_region"], "descendant_id_set"] + id_set = hierarchy_info.loc[row["brain_region"], "descendant_ids"] average_slice_volume = voxel_volume * get_average_voxel_count_per_slice( id_set, annotation, thickness ) diff --git a/atlas_densities/densities/utils.py b/atlas_densities/densities/utils.py index c0879bb..117da48 100644 --- a/atlas_densities/densities/utils.py +++ b/atlas_densities/densities/utils.py @@ -458,7 +458,7 @@ def get_hierarchy_info( root: str = "Basic cell groups and regions", ) -> "pd.DataFrame": """ - Returns the name and the descendant_id_set of each region that can be found by `region_map`. + Returns the name and the descendant_ids of each region that can be found by `region_map`. Note: We assume that the hierarchy file has unique brain region names. @@ -471,24 +471,22 @@ def get_hierarchy_info( Returns: returns a dataframe with index a list of unique region ids and two columns (values are fake) - descendant_id_set brain_region + descendant_ids brain_region 1 {1, 3} "Cerebellum" 2 {2, 4, 10} "Isocortex" ... ... The index consists in the sorted list of the identifiers of every region recorded in - `region_map` under root. The column `descendant_id_set` holds for each region the set of + `region_map` under root. The column `descendant_ids` holds for each region the set of identifiers of the descendants including the region itself. `brain region` is the list of every region name. """ region_ids = list(region_map.find(root, attr="name", with_descendants=True)) region_ids.sort() - descendant_id_sets = [ - region_map.find(id_, attr="id", with_descendants=True) for id_ in region_ids - ] + descendant_ids = [region_map.find(id_, attr="id", with_descendants=True) for id_ in region_ids] region_names = [region_map.get(id_, attr="name") for id_ in region_ids] data_frame = pd.DataFrame( - {"brain_region": region_names, "descendant_id_set": descendant_id_sets}, + {"brain_region": region_names, "descendant_ids": descendant_ids}, index=region_ids, ) @@ -537,7 +535,7 @@ def compute_region_volumes( volumes = [] for id_ in hierarchy_info.index: - id_set = hierarchy_info.loc[id_, "descendant_id_set"] + id_set = hierarchy_info.loc[id_, "descendant_ids"] volume = result.loc[list(id_set), "id_volume"].sum() volumes.append(volume) result["volume"] = volumes diff --git a/tests/densities/test_inhibitory_neuron_density_optimization.py b/tests/densities/test_inhibitory_neuron_density_optimization.py index 4c6b86b..ba5276b 100644 --- a/tests/densities/test_inhibitory_neuron_density_optimization.py +++ b/tests/densities/test_inhibitory_neuron_density_optimization.py @@ -28,7 +28,9 @@ def test_resize_average_densities(): index=["A", "B"], ) hierarchy_info = pd.DataFrame( - {"brain_region": ["A", "B", "C"], "descendant_id_set": [{1}, {2}, {1, 2, 3}]}, + { + "brain_region": ["A", "B", "C"], + }, index=[1, 2, 3], ) @@ -64,7 +66,7 @@ def test_check_region_counts_consistency(): index=["A", "B", "C"], ) hierarchy_info = pd.DataFrame( - {"brain_region": ["A", "B", "C"], "descendant_id_set": [{1, 2, 3}, {2}, {3}]}, + {"brain_region": ["A", "B", "C"], "descendant_ids": [{1, 2, 3}, {2}, {3}]}, index=[1, 2, 3], ) diff --git a/tests/densities/test_optimization_initialization.py b/tests/densities/test_optimization_initialization.py index abf2540..f15bdc3 100644 --- a/tests/densities/test_optimization_initialization.py +++ b/tests/densities/test_optimization_initialization.py @@ -16,7 +16,7 @@ @pytest.fixture def counts_data_1(): hierarchy_info = pd.DataFrame( - {"brain_region": ["A", "B"], "descendant_id_set": [{1}, {2}]}, index=[1, 2] + {"brain_region": ["A", "B"], "descendant_ids": [{1}, {2}]}, index=[1, 2] ) region_counts = pd.DataFrame( { @@ -162,7 +162,7 @@ def test_set_known_values_3(counts_data_1): def counts_data_2(): hierarchy_info = pd.DataFrame( # A is now a parent region of B - {"brain_region": ["A", "B"], "descendant_id_set": [{1, 2}, {2}]}, + {"brain_region": ["A", "B"], "descendant_ids": [{1, 2}, {2}]}, index=[1, 2], ) region_counts = pd.DataFrame( @@ -335,7 +335,7 @@ def neuron_counts(): def counts_data_3(): hierarchy_info = pd.DataFrame( - {"brain_region": ["A", "B", "C"], "descendant_id_set": [{1, 2, 3}, {2}, {3}]}, + {"brain_region": ["A", "B", "C"], "descendant_ids": [{1, 2, 3}, {2}, {3}]}, index=[1, 2, 3], ) region_counts = pd.DataFrame( @@ -571,7 +571,7 @@ def test_create_bounds_2(): @pytest.fixture def aub_and_bub_data(): hierarchy_info = pd.DataFrame( - {"brain_region": ["A", "B", "C"], "descendant_id_set": [{1, 2, 3}, {2}, {3}]}, + {"brain_region": ["A", "B", "C"], "descendant_ids": [{1, 2, 3}, {2}, {3}]}, index=[1, 2, 3], ) x_map, deltas_map = expected_maps() diff --git a/tests/densities/test_utils.py b/tests/densities/test_utils.py index 51b9ba8..c4e6d4b 100644 --- a/tests/densities/test_utils.py +++ b/tests/densities/test_utils.py @@ -339,7 +339,7 @@ def get_hierarchy_info(): "Lobule II, Purkinje layer", "Lobule II, molecular layer", ], - "descendant_id_set": [ + "descendant_ids": [ {920, 976, 10708, 10709, 10710}, {976, 10708, 10709, 10710}, {10708},