Skip to content

Commit

Permalink
rename descendant_id_set -> descendant_ids
Browse files Browse the repository at this point in the history
start _check_descendants_consistency simplification
  • Loading branch information
mgeplf committed Mar 1, 2024
1 parent aeec00b commit 08e2570
Show file tree
Hide file tree
Showing 7 changed files with 61 additions and 70 deletions.
10 changes: 5 additions & 5 deletions atlas_densities/densities/fitting.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,11 +147,11 @@ def fill_in_homogenous_regions(
inhibitory_mask = homogenous_regions["cell_type"] == "inhibitory"

for region_name in homogenous_regions[inhibitory_mask]["brain_region"]:
desc_id_set = hierarchy_info.at[region_name, "descendant_id_set"]
desc_id_set = hierarchy_info.at[region_name, "descendant_ids"]
id_mask = [id_ in desc_id_set for id_ in hierarchy_info["id"]]
for child_region_name in hierarchy_info[id_mask].index:
region_mask = np.isin(
annotation, list(hierarchy_info.at[child_region_name, "descendant_id_set"])
annotation, list(hierarchy_info.at[child_region_name, "descendant_ids"])
)
density = np.mean(neuron_density[region_mask])
densities.at[child_region_name, "inhibitory_neuron"] = density
Expand All @@ -165,7 +165,7 @@ def fill_in_homogenous_regions(
excitatory_mask = homogenous_regions["cell_type"] == "excitatory"

for region_name in homogenous_regions[excitatory_mask]["brain_region"]:
desc_id_set = hierarchy_info.at[region_name, "descendant_id_set"]
desc_id_set = hierarchy_info.at[region_name, "descendant_ids"]
id_mask = [id_ in desc_id_set for id_ in hierarchy_info["id"]]
for child_region_name in hierarchy_info[id_mask].index:
densities.at[child_region_name, "inhibitory_neuron"] = 0.0
Expand Down Expand Up @@ -232,7 +232,7 @@ def compute_average_intensities(
}
where each intensity array is of shape (W, H, D) and where the items of each slice
list range in [0, W - 1].
hierarchy_info: data frame with colums "descendant_id_set" (set[int]) and "brain_region"
hierarchy_info: data frame with colums "descendant_ids" (set[int]) and "brain_region"
(str) and whose index is a list of region ids.
See :fun:`atlas_densities.densities.utils.get_hierarchy_info`.
Expand Down Expand Up @@ -260,7 +260,7 @@ def compute_average_intensities(
region_count,
)
for region_name in tqdm(result.index):
region_mask = np.isin(annotation, list(hierarchy_info.at[region_name, "descendant_id_set"]))
region_mask = np.isin(annotation, list(hierarchy_info.at[region_name, "descendant_ids"]))
for marker, intensity in gene_marker_volumes.items():
result.at[region_name, marker.lower()] = compute_average_intensity(
intensity["intensity"], region_mask, intensity["slices"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,51 +114,46 @@ def _check_region_counts_consistency(
"""

def _check_descendants_consistency(
region_counts, hierarchy_info, region_name: str, id_set: Set[int], cell_type: str
):
if region_counts.at[region_name, f"{cell_type}_standard_deviation"] == 0.0:
count = region_counts.at[region_name, cell_type]
descendants_names = hierarchy_info.loc[list(id_set), "brain_region"]
zero_std_mask = (
region_counts.loc[descendants_names, f"{cell_type}_standard_deviation"] == 0.0
)
mask = zero_std_mask & (
region_counts.loc[descendants_names, cell_type] > count + tolerance
def _check_descendants_consistency(region_name: str, id_set: Set[int], cell_type: str):
standard_deviation = f"{cell_type}_standard_deviation"

if region_counts.at[region_name, standard_deviation] != 0.0:
return

# count is certain, since standard_deviation is 0
count = region_counts.at[region_name, cell_type]
descendants_names = hierarchy_info.loc[list(id_set), "brain_region"]
zero_std_mask = region_counts.loc[descendants_names, standard_deviation] == 0.0
mask = zero_std_mask & (region_counts.loc[descendants_names, cell_type] > count + tolerance)
descendants_counts = region_counts.loc[descendants_names, cell_type][mask]
if not descendants_counts.empty:
raise AtlasDensitiesError(
f"Counts of {cell_type} cells in regions {list(descendants_names)} all exceed "
f"the count of its ancestor region {region_name} and each count is given "
f"for certain. The counts {descendants_counts} are all larger than "
f"{count}."
)
descendants_counts = region_counts.loc[descendants_names, cell_type][mask]
if not descendants_counts.empty:
raise AtlasDensitiesError(
f"Counts of {cell_type} cells in regions {list(descendants_names)} all exceed "
f"the count of its ancestor region {region_name} and each count is given "
f"for certain. The counts {descendants_counts} are all larger than "
f"{count}."
)
names_with_certainty = descendants_names[zero_std_mask.to_list()].to_list()
leaf_names = [
hierarchy_info.at[id_, "brain_region"]
for id_ in id_set
if len(hierarchy_info.at[id_, "descendant_id_set"]) == 1
and hierarchy_info.at[id_, "brain_region"] in names_with_certainty
]
leaves_count_sum = np.sum(region_counts.loc[leaf_names, cell_type])
if leaves_count_sum > count + tolerance:
raise AtlasDensitiesError(
f"The sum of the counts of {cell_type} cells in leaf regions",
f" which are given with certainty, exceeds the count of the ancestor"
f" region {region_name}, also given with certainty: "
f"{leaves_count_sum} > {count}.",
)

cell_types = get_cell_types(region_counts)
for region_name, id_set in zip(
hierarchy_info["brain_region"], hierarchy_info["descendant_id_set"]
):
for cell_type in cell_types:
_check_descendants_consistency(
region_counts, hierarchy_info, region_name, id_set, cell_type
names_with_certainty = descendants_names[zero_std_mask.to_list()].to_list()
leaf_names = [
hierarchy_info.at[id_, "brain_region"]
for id_ in id_set
if len(hierarchy_info.at[id_, "descendant_ids"]) == 1
and hierarchy_info.at[id_, "brain_region"] in names_with_certainty
]
leaves_count_sum = np.sum(region_counts.loc[leaf_names, cell_type])
if leaves_count_sum > count + tolerance:
raise AtlasDensitiesError(
f"The sum of the counts of {cell_type} cells in leaf regions",
" which are given with certainty, exceeds the count of the ancestor"
f" region {region_name}, also given with certainty: "
f"{leaves_count_sum} > {count}.",
)

for _, region_name, id_set in hierarchy_info[["brain_region", "descendant_ids"]].itertuples():
for cell_type in get_cell_types(region_counts):
_check_descendants_consistency(region_name, id_set, cell_type)


def _replace_inf_with_none(bounds: FloatArray) -> List[MinMaxPair]:
"""
Expand Down Expand Up @@ -273,7 +268,7 @@ def _zero_descendants(id_, cell_type, x_result, deltas, hierarchy_info):
Zeroes the count of `cell_type` cells in every descendant regions of `id_`,
including `id_`.
"""
desc_ids = list(hierarchy_info.at[id_, "descendant_id_set"])
desc_ids = list(hierarchy_info.at[id_, "descendant_ids"])
x_result.loc[desc_ids, cell_type] = 0.0
desc_names = hierarchy_info.loc[desc_ids, "brain_region"]
deltas.loc[desc_names, cell_type] = 0.0
Expand Down Expand Up @@ -468,9 +463,7 @@ def check_constraint_3e(
b_ub = []
variable_count = len(x_map) + len(deltas_map)
cell_types = get_cell_types(region_counts)
for id_, region_name, set_ in zip(
hierarchy_info.index, hierarchy_info["brain_region"], hierarchy_info["descendant_id_set"]
):
for id_, region_name, set_ in hierarchy_info[["brain_region", "descendant_ids"]].itertuples():
for cell_type in cell_types:
if (region_name, cell_type) in deltas_map:
constraint = np.zeros((variable_count,), dtype=float)
Expand Down Expand Up @@ -589,8 +582,6 @@ def _compute_initial_cell_counts(
L.info("Computing cell count estimates in every 3D region ...")
region_counts = average_densities_to_cell_counts(average_densities, volumes)

# Detect cell counts inconsistency between a region and its descendants when
# estimates are deemed as certain.
_check_region_counts_consistency(region_counts, hierarchy_info)

L.info("Computing cell count estimates in atomic 3D region ...")
Expand Down Expand Up @@ -634,7 +625,7 @@ def _check_variables_consistency(
"""
cell_count_tolerance = 1e-2 # absolute tolerance to rule out round-off errors
for region_name, id_, id_set in zip(
deltas.index, hierarchy_info.index, hierarchy_info["descendant_id_set"]
deltas.index, hierarchy_info.index, hierarchy_info["descendant_ids"]
):
for cell_type in cell_types:
if np.isfinite(deltas.loc[region_name, cell_type]):
Expand Down
4 changes: 2 additions & 2 deletions atlas_densities/densities/measurement_to_density.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def compute_region_densities(
The index is the sorted list of all region identifiers.
"""
densities = []
descendants = hierarchy_info["descendant_id_set"]
descendants = hierarchy_info["descendant_ids"]
for iset_ in tqdm(range(len(descendants))):
mask = np.isin(annotation, list(descendants.iloc[iset_]))
densities.append(np.sum(cell_density[mask]) / np.count_nonzero(mask))
Expand Down Expand Up @@ -245,7 +245,7 @@ def cell_count_per_slice_to_density(
cell_counts_per_slice = measurements[mask_50um]
hierarchy_info = hierarchy_info.set_index("brain_region")
for index, row in cell_counts_per_slice.iterrows():
id_set = hierarchy_info.loc[row["brain_region"], "descendant_id_set"]
id_set = hierarchy_info.loc[row["brain_region"], "descendant_ids"]
average_slice_volume = voxel_volume * get_average_voxel_count_per_slice(
id_set, annotation, thickness
)
Expand Down
14 changes: 6 additions & 8 deletions atlas_densities/densities/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,7 @@ def get_hierarchy_info(
root: str = "Basic cell groups and regions",
) -> "pd.DataFrame":
"""
Returns the name and the descendant_id_set of each region that can be found by `region_map`.
Returns the name and the descendant_ids of each region that can be found by `region_map`.
Note: We assume that the hierarchy file has unique brain region names.
Expand All @@ -471,24 +471,22 @@ def get_hierarchy_info(
Returns:
returns a dataframe with index a list of unique region ids and two columns
(values are fake)
descendant_id_set brain_region
descendant_ids brain_region
1 {1, 3} "Cerebellum"
2 {2, 4, 10} "Isocortex"
... ...
The index consists in the sorted list of the identifiers of every region recorded in
`region_map` under root. The column `descendant_id_set` holds for each region the set of
`region_map` under root. The column `descendant_ids` holds for each region the set of
identifiers of the descendants including the region itself. `brain region` is the list of
every region name.
"""
region_ids = list(region_map.find(root, attr="name", with_descendants=True))
region_ids.sort()
descendant_id_sets = [
region_map.find(id_, attr="id", with_descendants=True) for id_ in region_ids
]
descendant_ids = [region_map.find(id_, attr="id", with_descendants=True) for id_ in region_ids]
region_names = [region_map.get(id_, attr="name") for id_ in region_ids]
data_frame = pd.DataFrame(
{"brain_region": region_names, "descendant_id_set": descendant_id_sets},
{"brain_region": region_names, "descendant_ids": descendant_ids},
index=region_ids,
)

Expand Down Expand Up @@ -537,7 +535,7 @@ def compute_region_volumes(

volumes = []
for id_ in hierarchy_info.index:
id_set = hierarchy_info.loc[id_, "descendant_id_set"]
id_set = hierarchy_info.loc[id_, "descendant_ids"]
volume = result.loc[list(id_set), "id_volume"].sum()
volumes.append(volume)
result["volume"] = volumes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ def test_resize_average_densities():
index=["A", "B"],
)
hierarchy_info = pd.DataFrame(
{"brain_region": ["A", "B", "C"], "descendant_id_set": [{1}, {2}, {1, 2, 3}]},
{
"brain_region": ["A", "B", "C"],
},
index=[1, 2, 3],
)

Expand Down Expand Up @@ -64,7 +66,7 @@ def test_check_region_counts_consistency():
index=["A", "B", "C"],
)
hierarchy_info = pd.DataFrame(
{"brain_region": ["A", "B", "C"], "descendant_id_set": [{1, 2, 3}, {2}, {3}]},
{"brain_region": ["A", "B", "C"], "descendant_ids": [{1, 2, 3}, {2}, {3}]},
index=[1, 2, 3],
)

Expand Down
8 changes: 4 additions & 4 deletions tests/densities/test_optimization_initialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
@pytest.fixture
def counts_data_1():
hierarchy_info = pd.DataFrame(
{"brain_region": ["A", "B"], "descendant_id_set": [{1}, {2}]}, index=[1, 2]
{"brain_region": ["A", "B"], "descendant_ids": [{1}, {2}]}, index=[1, 2]
)
region_counts = pd.DataFrame(
{
Expand Down Expand Up @@ -162,7 +162,7 @@ def test_set_known_values_3(counts_data_1):
def counts_data_2():
hierarchy_info = pd.DataFrame(
# A is now a parent region of B
{"brain_region": ["A", "B"], "descendant_id_set": [{1, 2}, {2}]},
{"brain_region": ["A", "B"], "descendant_ids": [{1, 2}, {2}]},
index=[1, 2],
)
region_counts = pd.DataFrame(
Expand Down Expand Up @@ -335,7 +335,7 @@ def neuron_counts():

def counts_data_3():
hierarchy_info = pd.DataFrame(
{"brain_region": ["A", "B", "C"], "descendant_id_set": [{1, 2, 3}, {2}, {3}]},
{"brain_region": ["A", "B", "C"], "descendant_ids": [{1, 2, 3}, {2}, {3}]},
index=[1, 2, 3],
)
region_counts = pd.DataFrame(
Expand Down Expand Up @@ -571,7 +571,7 @@ def test_create_bounds_2():
@pytest.fixture
def aub_and_bub_data():
hierarchy_info = pd.DataFrame(
{"brain_region": ["A", "B", "C"], "descendant_id_set": [{1, 2, 3}, {2}, {3}]},
{"brain_region": ["A", "B", "C"], "descendant_ids": [{1, 2, 3}, {2}, {3}]},
index=[1, 2, 3],
)
x_map, deltas_map = expected_maps()
Expand Down
2 changes: 1 addition & 1 deletion tests/densities/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ def get_hierarchy_info():
"Lobule II, Purkinje layer",
"Lobule II, molecular layer",
],
"descendant_id_set": [
"descendant_ids": [
{920, 976, 10708, 10709, 10710},
{976, 10708, 10709, 10710},
{10708},
Expand Down

0 comments on commit 08e2570

Please sign in to comment.