rename descendant_id_set -> descendant_ids

start _check_descendants_consistency simplification
BlueBrain · Mar 1, 2024 · 08e2570 · 08e2570
1 parent aeec00b
commit 08e2570
Show file tree

Hide file tree

Showing 7 changed files with 61 additions and 70 deletions.
diff --git a/atlas_densities/densities/fitting.py b/atlas_densities/densities/fitting.py
@@ -147,11 +147,11 @@ def fill_in_homogenous_regions(
     inhibitory_mask = homogenous_regions["cell_type"] == "inhibitory"
 
     for region_name in homogenous_regions[inhibitory_mask]["brain_region"]:
-        desc_id_set = hierarchy_info.at[region_name, "descendant_id_set"]
+        desc_id_set = hierarchy_info.at[region_name, "descendant_ids"]
         id_mask = [id_ in desc_id_set for id_ in hierarchy_info["id"]]
         for child_region_name in hierarchy_info[id_mask].index:
             region_mask = np.isin(
-                annotation, list(hierarchy_info.at[child_region_name, "descendant_id_set"])
+                annotation, list(hierarchy_info.at[child_region_name, "descendant_ids"])
             )
             density = np.mean(neuron_density[region_mask])
             densities.at[child_region_name, "inhibitory_neuron"] = density
@@ -165,7 +165,7 @@ def fill_in_homogenous_regions(
     excitatory_mask = homogenous_regions["cell_type"] == "excitatory"
 
     for region_name in homogenous_regions[excitatory_mask]["brain_region"]:
-        desc_id_set = hierarchy_info.at[region_name, "descendant_id_set"]
+        desc_id_set = hierarchy_info.at[region_name, "descendant_ids"]
         id_mask = [id_ in desc_id_set for id_ in hierarchy_info["id"]]
         for child_region_name in hierarchy_info[id_mask].index:
             densities.at[child_region_name, "inhibitory_neuron"] = 0.0
@@ -232,7 +232,7 @@ def compute_average_intensities(
                 }
             where each intensity array is of shape (W, H, D) and where the items of each slice
             list range in [0, W - 1].
-        hierarchy_info: data frame with colums "descendant_id_set" (set[int]) and "brain_region"
+        hierarchy_info: data frame with colums "descendant_ids" (set[int]) and "brain_region"
             (str) and whose index is a list of region ids.
             See :fun:`atlas_densities.densities.utils.get_hierarchy_info`.
 
@@ -260,7 +260,7 @@ def compute_average_intensities(
         region_count,
     )
     for region_name in tqdm(result.index):
-        region_mask = np.isin(annotation, list(hierarchy_info.at[region_name, "descendant_id_set"]))
+        region_mask = np.isin(annotation, list(hierarchy_info.at[region_name, "descendant_ids"]))
         for marker, intensity in gene_marker_volumes.items():
             result.at[region_name, marker.lower()] = compute_average_intensity(
                 intensity["intensity"], region_mask, intensity["slices"]

diff --git a/atlas_densities/densities/inhibitory_neuron_densities_optimization.py b/atlas_densities/densities/inhibitory_neuron_densities_optimization.py
@@ -114,51 +114,46 @@ def _check_region_counts_consistency(
 
     """
 
-    def _check_descendants_consistency(
-        region_counts, hierarchy_info, region_name: str, id_set: Set[int], cell_type: str
-    ):
-        if region_counts.at[region_name, f"{cell_type}_standard_deviation"] == 0.0:
-            count = region_counts.at[region_name, cell_type]
-            descendants_names = hierarchy_info.loc[list(id_set), "brain_region"]
-            zero_std_mask = (
-                region_counts.loc[descendants_names, f"{cell_type}_standard_deviation"] == 0.0
-            )
-            mask = zero_std_mask & (
-                region_counts.loc[descendants_names, cell_type] > count + tolerance
+    def _check_descendants_consistency(region_name: str, id_set: Set[int], cell_type: str):
+        standard_deviation = f"{cell_type}_standard_deviation"
+
+        if region_counts.at[region_name, standard_deviation] != 0.0:
+            return
+
+        # count is certain, since standard_deviation is 0
+        count = region_counts.at[region_name, cell_type]
+        descendants_names = hierarchy_info.loc[list(id_set), "brain_region"]
+        zero_std_mask = region_counts.loc[descendants_names, standard_deviation] == 0.0
+        mask = zero_std_mask & (region_counts.loc[descendants_names, cell_type] > count + tolerance)
+        descendants_counts = region_counts.loc[descendants_names, cell_type][mask]
+        if not descendants_counts.empty:
+            raise AtlasDensitiesError(
+                f"Counts of {cell_type} cells in regions {list(descendants_names)} all exceed "
+                f"the count of its ancestor region {region_name} and each count is given "
+                f"for certain. The counts {descendants_counts} are all larger than "
+                f"{count}."
             )
-            descendants_counts = region_counts.loc[descendants_names, cell_type][mask]
-            if not descendants_counts.empty:
-                raise AtlasDensitiesError(
-                    f"Counts of {cell_type} cells in regions {list(descendants_names)} all exceed "
-                    f"the count of its ancestor region {region_name} and each count is given "
-                    f"for certain. The counts {descendants_counts} are all larger than "
-                    f"{count}."
-                )
-            names_with_certainty = descendants_names[zero_std_mask.to_list()].to_list()
-            leaf_names = [
-                hierarchy_info.at[id_, "brain_region"]
-                for id_ in id_set
-                if len(hierarchy_info.at[id_, "descendant_id_set"]) == 1
-                and hierarchy_info.at[id_, "brain_region"] in names_with_certainty
-            ]
-            leaves_count_sum = np.sum(region_counts.loc[leaf_names, cell_type])
-            if leaves_count_sum > count + tolerance:
-                raise AtlasDensitiesError(
-                    f"The sum of the counts of {cell_type} cells in leaf regions",
-                    f" which are given with certainty, exceeds the count of the ancestor"
-                    f" region {region_name}, also given with certainty: "
-                    f"{leaves_count_sum} > {count}.",
-                )
 
-    cell_types = get_cell_types(region_counts)
-    for region_name, id_set in zip(
-        hierarchy_info["brain_region"], hierarchy_info["descendant_id_set"]
-    ):
-        for cell_type in cell_types:
-            _check_descendants_consistency(
-                region_counts, hierarchy_info, region_name, id_set, cell_type
+        names_with_certainty = descendants_names[zero_std_mask.to_list()].to_list()
+        leaf_names = [
+            hierarchy_info.at[id_, "brain_region"]
+            for id_ in id_set
+            if len(hierarchy_info.at[id_, "descendant_ids"]) == 1
+            and hierarchy_info.at[id_, "brain_region"] in names_with_certainty
+        ]
+        leaves_count_sum = np.sum(region_counts.loc[leaf_names, cell_type])
+        if leaves_count_sum > count + tolerance:
+            raise AtlasDensitiesError(
+                f"The sum of the counts of {cell_type} cells in leaf regions",
+                " which are given with certainty, exceeds the count of the ancestor"
+                f" region {region_name}, also given with certainty: "
+                f"{leaves_count_sum} > {count}.",
             )
 
+    for _, region_name, id_set in hierarchy_info[["brain_region", "descendant_ids"]].itertuples():
+        for cell_type in get_cell_types(region_counts):
+            _check_descendants_consistency(region_name, id_set, cell_type)
+
 
 def _replace_inf_with_none(bounds: FloatArray) -> List[MinMaxPair]:
     """
@@ -273,7 +268,7 @@ def _zero_descendants(id_, cell_type, x_result, deltas, hierarchy_info):
         Zeroes the count of `cell_type` cells in every descendant regions of `id_`,
         including `id_`.
         """
-        desc_ids = list(hierarchy_info.at[id_, "descendant_id_set"])
+        desc_ids = list(hierarchy_info.at[id_, "descendant_ids"])
         x_result.loc[desc_ids, cell_type] = 0.0
         desc_names = hierarchy_info.loc[desc_ids, "brain_region"]
         deltas.loc[desc_names, cell_type] = 0.0
@@ -468,9 +463,7 @@ def check_constraint_3e(
     b_ub = []
     variable_count = len(x_map) + len(deltas_map)
     cell_types = get_cell_types(region_counts)
-    for id_, region_name, set_ in zip(
-        hierarchy_info.index, hierarchy_info["brain_region"], hierarchy_info["descendant_id_set"]
-    ):
+    for id_, region_name, set_ in hierarchy_info[["brain_region", "descendant_ids"]].itertuples():
         for cell_type in cell_types:
             if (region_name, cell_type) in deltas_map:
                 constraint = np.zeros((variable_count,), dtype=float)
@@ -589,8 +582,6 @@ def _compute_initial_cell_counts(
     L.info("Computing cell count estimates in every 3D region ...")
     region_counts = average_densities_to_cell_counts(average_densities, volumes)
 
-    # Detect cell counts inconsistency between a region and its descendants when
-    # estimates are deemed as certain.
     _check_region_counts_consistency(region_counts, hierarchy_info)
 
     L.info("Computing cell count estimates in atomic 3D region ...")
@@ -634,7 +625,7 @@ def _check_variables_consistency(
     """
     cell_count_tolerance = 1e-2  # absolute tolerance to rule out round-off errors
     for region_name, id_, id_set in zip(
-        deltas.index, hierarchy_info.index, hierarchy_info["descendant_id_set"]
+        deltas.index, hierarchy_info.index, hierarchy_info["descendant_ids"]
     ):
         for cell_type in cell_types:
             if np.isfinite(deltas.loc[region_name, cell_type]):

diff --git a/atlas_densities/densities/measurement_to_density.py b/atlas_densities/densities/measurement_to_density.py
@@ -72,7 +72,7 @@ def compute_region_densities(
         The index is the sorted list of all region identifiers.
     """
     densities = []
-    descendants = hierarchy_info["descendant_id_set"]
+    descendants = hierarchy_info["descendant_ids"]
     for iset_ in tqdm(range(len(descendants))):
         mask = np.isin(annotation, list(descendants.iloc[iset_]))
         densities.append(np.sum(cell_density[mask]) / np.count_nonzero(mask))
@@ -245,7 +245,7 @@ def cell_count_per_slice_to_density(
     cell_counts_per_slice = measurements[mask_50um]
     hierarchy_info = hierarchy_info.set_index("brain_region")
     for index, row in cell_counts_per_slice.iterrows():
-        id_set = hierarchy_info.loc[row["brain_region"], "descendant_id_set"]
+        id_set = hierarchy_info.loc[row["brain_region"], "descendant_ids"]
         average_slice_volume = voxel_volume * get_average_voxel_count_per_slice(
             id_set, annotation, thickness
         )

diff --git a/atlas_densities/densities/utils.py b/atlas_densities/densities/utils.py
@@ -458,7 +458,7 @@ def get_hierarchy_info(
     root: str = "Basic cell groups and regions",
 ) -> "pd.DataFrame":
     """
-    Returns the name and the descendant_id_set of each region that can be found by `region_map`.
+    Returns the name and the descendant_ids of each region that can be found by `region_map`.
 
     Note: We assume that the hierarchy file has unique brain region names.
 
@@ -471,24 +471,22 @@ def get_hierarchy_info(
     Returns:
         returns a dataframe with index a list of unique region ids and two columns
         (values are fake)
-                 descendant_id_set    brain_region
+                 descendant_ids    brain_region
             1    {1, 3}               "Cerebellum"
             2    {2, 4, 10}           "Isocortex"
                  ...             ...
         The index consists in the sorted list of the identifiers of every region recorded in
-        `region_map` under root. The column `descendant_id_set` holds for each region the set of
+        `region_map` under root. The column `descendant_ids` holds for each region the set of
         identifiers of the descendants including the region itself. `brain region` is the list of
         every region name.
 
     """
     region_ids = list(region_map.find(root, attr="name", with_descendants=True))
     region_ids.sort()
-    descendant_id_sets = [
-        region_map.find(id_, attr="id", with_descendants=True) for id_ in region_ids
-    ]
+    descendant_ids = [region_map.find(id_, attr="id", with_descendants=True) for id_ in region_ids]
     region_names = [region_map.get(id_, attr="name") for id_ in region_ids]
     data_frame = pd.DataFrame(
-        {"brain_region": region_names, "descendant_id_set": descendant_id_sets},
+        {"brain_region": region_names, "descendant_ids": descendant_ids},
         index=region_ids,
     )
 
@@ -537,7 +535,7 @@ def compute_region_volumes(
 
     volumes = []
     for id_ in hierarchy_info.index:
-        id_set = hierarchy_info.loc[id_, "descendant_id_set"]
+        id_set = hierarchy_info.loc[id_, "descendant_ids"]
         volume = result.loc[list(id_set), "id_volume"].sum()
         volumes.append(volume)
     result["volume"] = volumes

diff --git a/tests/densities/test_inhibitory_neuron_density_optimization.py b/tests/densities/test_inhibitory_neuron_density_optimization.py
@@ -28,7 +28,9 @@ def test_resize_average_densities():
         index=["A", "B"],
     )
     hierarchy_info = pd.DataFrame(
-        {"brain_region": ["A", "B", "C"], "descendant_id_set": [{1}, {2}, {1, 2, 3}]},
+        {
+            "brain_region": ["A", "B", "C"],
+        },
         index=[1, 2, 3],
     )
 
@@ -64,7 +66,7 @@ def test_check_region_counts_consistency():
         index=["A", "B", "C"],
     )
     hierarchy_info = pd.DataFrame(
-        {"brain_region": ["A", "B", "C"], "descendant_id_set": [{1, 2, 3}, {2}, {3}]},
+        {"brain_region": ["A", "B", "C"], "descendant_ids": [{1, 2, 3}, {2}, {3}]},
         index=[1, 2, 3],
     )
 

diff --git a/tests/densities/test_optimization_initialization.py b/tests/densities/test_optimization_initialization.py
@@ -16,7 +16,7 @@
 @pytest.fixture
 def counts_data_1():
     hierarchy_info = pd.DataFrame(
-        {"brain_region": ["A", "B"], "descendant_id_set": [{1}, {2}]}, index=[1, 2]
+        {"brain_region": ["A", "B"], "descendant_ids": [{1}, {2}]}, index=[1, 2]
     )
     region_counts = pd.DataFrame(
         {
@@ -162,7 +162,7 @@ def test_set_known_values_3(counts_data_1):
 def counts_data_2():
     hierarchy_info = pd.DataFrame(
         # A is now a parent region of B
-        {"brain_region": ["A", "B"], "descendant_id_set": [{1, 2}, {2}]},
+        {"brain_region": ["A", "B"], "descendant_ids": [{1, 2}, {2}]},
         index=[1, 2],
     )
     region_counts = pd.DataFrame(
@@ -335,7 +335,7 @@ def neuron_counts():
 
 def counts_data_3():
     hierarchy_info = pd.DataFrame(
-        {"brain_region": ["A", "B", "C"], "descendant_id_set": [{1, 2, 3}, {2}, {3}]},
+        {"brain_region": ["A", "B", "C"], "descendant_ids": [{1, 2, 3}, {2}, {3}]},
         index=[1, 2, 3],
     )
     region_counts = pd.DataFrame(
@@ -571,7 +571,7 @@ def test_create_bounds_2():
 @pytest.fixture
 def aub_and_bub_data():
     hierarchy_info = pd.DataFrame(
-        {"brain_region": ["A", "B", "C"], "descendant_id_set": [{1, 2, 3}, {2}, {3}]},
+        {"brain_region": ["A", "B", "C"], "descendant_ids": [{1, 2, 3}, {2}, {3}]},
         index=[1, 2, 3],
     )
     x_map, deltas_map = expected_maps()

diff --git a/tests/densities/test_utils.py b/tests/densities/test_utils.py
@@ -339,7 +339,7 @@ def get_hierarchy_info():
                 "Lobule II, Purkinje layer",
                 "Lobule II, molecular layer",
             ],
-            "descendant_id_set": [
+            "descendant_ids": [
                 {920, 976, 10708, 10709, 10710},
                 {976, 10708, 10709, 10710},
                 {10708},