Skip to content

Commit

Permalink
Faster compute average intensities (#73)
Browse files Browse the repository at this point in the history
Most of the speed up comes from not having to repeatedly create masks:

old: 1920s
new: 10s, so about 190 times faster

Small change in computed densities:
    (Pdb++) np.abs(result.to_numpy() - old.to_numpy()).max()
        9.518734625513225e-08
Test code:
```
from atlas_densities.densities import fitting
from voxcell import VoxelData, RegionMap
from atlas_densities.densities import utils

annotation = VoxelData.load_nrrd('input-data/annotation_ccfv2_l23split_barrelsplit_validated.nrrd')

gene_voxeldata = {
    'gad67': VoxelData.load_nrrd('input-data/gene_gad67_correctednissl.nrrd'),
    'pv': VoxelData.load_nrrd('input-data/gene_pv_correctednissl.nrrd'),
    'sst': VoxelData.load_nrrd('input-data/gene_sst_correctednissl.nrrd'),
    'vip': VoxelData.load_nrrd('input-data/gene_vip_correctednissl.nrrd'),
}
gids = { 'gad67': '479', 'pv': '868', 'sst': '1001', 'vip': '77371835', }
slices = utils.load_json('input-data/realigned_slices.json')
gene_marker_volumes = {
    gene: {
        "intensity": gene_data.raw,
        "slices": slices[gids[gene]],  # list of integer slice indices
    }
    for (gene, gene_data) in gene_voxeldata.items()
}
region_map = RegionMap.load_json('input-data/hierarchy_ccfv2_l23split_barrelsplit.json')
hierarchy_info = utils.get_hierarchy_info(region_map, root='root')

res = fitting.compute_average_intensities(annotation.raw, gene_marker_volumes, hierarchy_info, region_map)
```

---------

Co-authored-by: Eleftherios Zisis <[email protected]>
  • Loading branch information
mgeplf and eleftherioszisis authored Mar 26, 2024
1 parent 5b08942 commit fba1e3f
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 60 deletions.
138 changes: 116 additions & 22 deletions atlas_densities/densities/fitting.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,19 @@
import itertools as it
import logging
import warnings
from typing import TYPE_CHECKING, Dict, List, Optional, Union
from typing import Dict, List, Optional, Union

import numpy as np
import pandas as pd
from atlas_commons.typing import AnnotationT, BoolArray, FloatArray
from scipy.optimize import curve_fit
from tqdm import tqdm
from voxcell import RegionMap, voxel_data

from atlas_densities.densities import utils
from atlas_densities.densities.measurement_to_density import remove_unknown_regions
from atlas_densities.exceptions import AtlasDensitiesError, AtlasDensitiesWarning

if TYPE_CHECKING: # pragma: no cover
from voxcell import RegionMap

L = logging.getLogger(__name__)

MarkerVolumes = Dict[str, Dict[str, Union[FloatArray, List[int]]]]
Expand Down Expand Up @@ -214,10 +212,105 @@ def compute_average_intensity(
return 0.0


def _add_depths(region_map_df):
"""Rdd a `depth` column to the `region_map_df` with how deep it is within the hierarchy."""
region_map_df["depth"] = 0
parent_ids = [
-1,
]
depth = 1
while parent_ids:
children_ids = region_map_df[np.isin(region_map_df["parent_id"], list(parent_ids))].index
region_map_df.loc[children_ids, "depth"] = depth
parent_ids = set(children_ids)
depth += 1


def _fill_densities(region_map, region_map_df, df):
"""Fill all `voxel_count` and `density` for each region within `def`.
This assumes the leaf nodes (ie: the deapest nodes in region_map) have had
their values filled in, and thus each of their parents can recursively be
filled in.
"""
order_idx = np.argsort(region_map_df.depth.to_numpy())

ids = region_map_df.index[order_idx[::-1]]
for id_ in ids:
if id_ not in region_map._children: # pylint: disable=protected-access
continue

children = region_map._children[id_] # pylint: disable=protected-access

if not children:
continue

voxel_count = df.loc[children, "voxel_count"]
count = voxel_count.sum()
if count:
df.loc[id_, "voxel_count"] = count
df.loc[id_, "density"] = np.average(df.loc[children, "density"], weights=voxel_count)


def _apply_density_slices(gene_marker_volumes):
"""For each marker in `gene_marker_volumes`, apply the slice mask to the indensities volume."""
ret = {}
for marker, intensity in gene_marker_volumes.items():
intensity, slices = intensity["intensity"], intensity["slices"]

if slices is None:
mask = np.ones_like(intensity, dtype=bool)
else:
mask = np.zeros_like(intensity, dtype=bool)
slices_ = [slice_ for slice_ in slices if 0 <= slice_ < mask.shape[0]]
mask[slices_] = True

ret[marker] = {"intensity": intensity, "slices": slices, "mask": mask}

return ret


def _compute_per_marker_intensity(annotation, gene_marker_volumes):
"""Compute the average intensity for `id_`, for all makers in `gene_marker_volumes`"""
vtiv = voxel_data.ValueToIndexVoxels(annotation)

count_and_density = []
for id_ in vtiv.values:
if id_ == 0:
continue

voxel_ids = vtiv.value_to_1d_indices(id_)

res = []
for marker, intensity in gene_marker_volumes.items():
mask_voxels = vtiv.ravel(intensity["mask"])[voxel_ids]

count = mask_voxels.sum()

if count <= 0:
continue

mean_density = vtiv.ravel(intensity["intensity"])[voxel_ids][mask_voxels].sum() / count

if mean_density == 0.0:
L.warning("Mean density for id=%s and marker=%s", id_, marker)

count_and_density.append((marker.lower(), id_, count, mean_density))

res = (
pd.DataFrame(count_and_density, columns=["marker", "id", "voxel_count", "density"])
.set_index("id")
.pivot(columns="marker")
.swaplevel(axis=1)
)
return res


def compute_average_intensities(
annotation: AnnotationT,
gene_marker_volumes: MarkerVolumes,
hierarchy_info: pd.DataFrame,
region_map,
) -> pd.DataFrame:
"""
Compute the average marker intensity of every region in `hierarchy_info` for every marker
Expand Down Expand Up @@ -249,27 +342,27 @@ def compute_average_intensities(
The index of the data frame is the list of regions `hierarchy_info["brain_region"]`.
The column labels are the keys of `gene_marker_volumes` in lower case.
"""
region_count = len(hierarchy_info["brain_region"])
data = np.full((region_count, len(gene_marker_volumes)), np.nan)
hierarchy_info = hierarchy_info.set_index("brain_region")
gene_marker_volumes = _apply_density_slices(gene_marker_volumes)

intensity = _compute_per_marker_intensity(annotation, gene_marker_volumes)

region_map_df = region_map.as_dataframe()
_add_depths(region_map_df)

result = pd.DataFrame(
data=data,
data=np.nan,
index=hierarchy_info.index,
columns=[marker_name.lower() for marker_name in gene_marker_volumes.keys()],
columns=[marker_name.lower() for marker_name in gene_marker_volumes],
)
result["brain_region"] = region_map_df.loc[result.index].name

L.info(
"Computing average intensities for %d markers in %d regions ...",
len(gene_marker_volumes),
region_count,
)
for region_name in tqdm(result.index):
region_mask = np.isin(annotation, list(hierarchy_info.at[region_name, "descendant_ids"]))
for marker, intensity in gene_marker_volumes.items():
result.at[region_name, marker.lower()] = compute_average_intensity(
intensity["intensity"], region_mask, intensity["slices"]
)
for marker in gene_marker_volumes:
df = pd.DataFrame(data=0.0, index=result.index, columns=["voxel_count", "density"])
df.update(intensity[marker.lower()])
_fill_densities(region_map, region_map_df, df)
result[marker.lower()] = df["density"]

result = result.set_index("brain_region")
return result


Expand Down Expand Up @@ -580,7 +673,7 @@ def _check_average_densities_sanity(average_densities: pd.DataFrame) -> None:
)


def _get_group_names(region_map: "RegionMap", group_ids_config: dict) -> dict[str, set[str]]:
def _get_group_names(region_map: RegionMap, group_ids_config: dict) -> dict[str, set[str]]:
"""
Get AIBS names for regions in several region groups of interest.
Expand Down Expand Up @@ -628,7 +721,7 @@ def _get_group_region_names(groups):


def linear_fitting( # pylint: disable=too-many-arguments
region_map: "RegionMap",
region_map: RegionMap,
annotation: AnnotationT,
neuron_density: FloatArray,
gene_marker_volumes: MarkerVolumes,
Expand Down Expand Up @@ -735,6 +828,7 @@ def linear_fitting( # pylint: disable=too-many-arguments
annotation,
gene_marker_volumes,
hierarchy_info.drop(hierarchy_info.index[indexes]),
region_map,
)

L.info("Computing fitting coefficients ...")
Expand Down
66 changes: 32 additions & 34 deletions tests/app/test_mtype_densities.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,54 +87,52 @@ def test_mtype_densities_from_profiles(tmp_path):
assert "neuron density file" in str(result.exception)


def get_result_from_probablity_map_(runner, td):
return runner.invoke(
tested.app,
[
# fmt: off
"--log-output-path", td,
"create-from-probability-map",
"--annotation-path", "annotation.nrrd",
"--hierarchy-path", "hierarchy.json",
"--probability-map", "probability_map01.csv",
"--probability-map", "probability_map02.csv",
"--marker", "pv", "pv.nrrd",
"--marker", "sst", "sst.nrrd",
"--marker", "vip", "vip.nrrd",
"--marker", "gad67", "gad67.nrrd",
"--marker", "approx_lamp5", "approx_lamp5.nrrd",
"--synapse-class", "EXC",
"--output-dir", "output_dir",
# fmt: on
],
)


def test_mtype_densities_from_probability_map(tmp_path):
data = create_from_probability_map_data()
runner = CliRunner()
with runner.isolated_filesystem(temp_dir=tmp_path) as td:
data["annotation"].save_nrrd("annotation.nrrd")
write_json("hierarchy.json", data["hierarchy"])
td = Path(td)

data["probability_map01"].to_csv("probability_map01.csv", index=True)
data["probability_map02"].to_csv("probability_map02.csv", index=True)
data["annotation"].save_nrrd(td / "annotation.nrrd")
write_json("hierarchy.json", data["hierarchy"])
data["probability_map01"].to_csv(td / "probability_map01.csv", index=True)
data["probability_map02"].to_csv(td / "probability_map02.csv", index=True)

for molecular_type, raw_data in data["molecular_type_densities"].items():
for molecular_type, raw in data["molecular_type_densities"].items():
VoxelData(
raw_data,
raw,
voxel_dimensions=data["annotation"].voxel_dimensions,
).save_nrrd(f"{molecular_type}.nrrd")

result = get_result_from_probablity_map_(runner, td)
).save_nrrd(td / f"{molecular_type}.nrrd")

result = runner.invoke(
tested.app,
[
# fmt: off
"--log-output-path", str(td),
"create-from-probability-map",
"--annotation-path", "annotation.nrrd",
"--hierarchy-path", "hierarchy.json",
"--probability-map", "probability_map01.csv",
"--probability-map", "probability_map02.csv",
"--marker", "pv", "pv.nrrd",
"--marker", "sst", "sst.nrrd",
"--marker", "vip", "vip.nrrd",
"--marker", "gad67", "gad67.nrrd",
"--marker", "approx_lamp5", "approx_lamp5.nrrd",
"--synapse-class", "EXC",
"--output-dir", "output_dir",
# fmt: on
],
)
assert result.exit_code == 0

BPbAC = VoxelData.load_nrrd(str(Path("output_dir") / "BP|bAC_EXC_densities.nrrd"))
BPbAC = VoxelData.load_nrrd(Path("output_dir") / "BP|bAC_EXC_densities.nrrd")
assert BPbAC.raw.dtype == float
npt.assert_array_equal(BPbAC.voxel_dimensions, data["annotation"].voxel_dimensions)

with open(str(Path("output_dir") / "metadata.json"), "r") as file:
with open(Path("output_dir") / "metadata.json", "r") as file:
metadata = json.load(file)

assert "BP" in metadata["density_files"]
assert "bAC" in metadata["density_files"]["BP"]
assert "EXC" == metadata["synapse_class"]
Expand Down
15 changes: 11 additions & 4 deletions tests/densities/test_fitting.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def test_create_dataframe_from_known_densities():


@pytest.fixture
def hierarchy_info():
def region_map():
hierarchy = {
"id": 8,
"name": "Basic cell groups and regions",
Expand Down Expand Up @@ -104,7 +104,12 @@ def hierarchy_info():
],
}

return utils.get_hierarchy_info(RegionMap.from_dict(hierarchy))
return RegionMap.from_dict(hierarchy)


@pytest.fixture
def hierarchy_info(region_map):
return utils.get_hierarchy_info(region_map)


def test_fill_in_homogenous_regions(hierarchy_info):
Expand Down Expand Up @@ -226,7 +231,7 @@ def test_compute_average_intensity():
assert actual == 0


def test_compute_average_intensities(hierarchy_info):
def test_compute_average_intensities(region_map, hierarchy_info):
annotation = np.array(
[[[0, 976], [976, 936]], [[976, 936], [936, 936]]] # 976 = Lobule II, 936 = "Declive (VI)""
)
Expand Down Expand Up @@ -261,7 +266,9 @@ def test_compute_average_intensities(hierarchy_info):
index=hierarchy_info["brain_region"],
)

actual = tested.compute_average_intensities(annotation, marker_volumes, hierarchy_info)
actual = tested.compute_average_intensities(
annotation, marker_volumes, hierarchy_info, region_map
)
pdt.assert_frame_equal(actual, expected)


Expand Down

0 comments on commit fba1e3f

Please sign in to comment.