Skip to content

Commit

Permalink
Remove need for config file for `atlas-densities cell-densities fit-a…
Browse files Browse the repository at this point in the history
…verage-densities` command

* The updated command doesn't require a combine_markers_ccfv2_config.yaml
* Instead, the command line has:
    --marker=pv:868:PATH/TO/pvalb.nrrd \
    --marker=sst:1001:PATH/TO/SST.nrrd \
    --marker=vip:77371835:PATH/TO/VIP.nrrd \
    --marker=gad67:479:PATH/TO/gad1.nrrd \
    --realigned-slices=atlas_densities/app/data/markers/realigned_slices_ccfv2.json \
    --cell-density-standard-deviations=atlas_densities/app/data/measurements/std_cells.json \
* As discussed in:
    #66
  • Loading branch information
mgeplf committed Feb 20, 2024
1 parent 376c0a8 commit 4ee394e
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 110 deletions.
31 changes: 18 additions & 13 deletions README.rst
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -260,27 +260,32 @@ Fit transfer functions from mean region intensity to neuron density

We fit here transfer functions that describe the relation between mean ISH expression in regions of
the mouse brain and literature regional density estimates (see `Rodarie et al. (2022)`_ for more
details). This step leverages AIBS ISH marker datasets (in their expression form, see also
`fit_average_densities_ccfv2_config.yaml`_) and the previously computed
literature density values.
These transfer functions are used to obtain first estimates of neuron densities in regions not
covered by literature.
details).
This step leverages AIBS ISH marker datasets and the previously computed literature density values.
These transfer functions are used to obtain first estimates of neuron densities in regions not covered by literature.
The result of the following command is a list of first density estimates for each neuron type and
for each region of the annotation volume.
Note the usage of multiple ``--markers`` the format is ``marker name:marker id:path/to/marker.nrrd``.
The ``marker name`` is used for the output columns, the ``marker id`` is used to lookup which slices to use in the ``--realigned-slices-path``.

.. code-block:: bash
# make output folder
mkdir -p data/ccfv2/first_estimates
atlas-densities cell-densities fit-average-densities \
--hierarchy-path=data/1.json \
--annotation-path=data/ccfv2/annotation_25.nrrd \
--neuron-density-path=data/ccfv2/density_volumes/neuron_density.nrrd \
--average-densities-path=data/ccfv2/measurements/lit_densities.csv \
--homogenous-regions-path=data/ccfv2/measurements/homogeneous_regions.csv \
--gene-config-path=atlas_densities/app/data/markers/fit_average_densities_ccfv2_config.yaml \
--fitted-densities-output-path=data/ccfv2/first_estimates/first_estimates.csv \
atlas-densities cell-densities fit-average-densities \
--hierarchy-path=data/1.json \
--annotation-path=data/ccfv2/annotation_25.nrrd \
--neuron-density-path=data/ccfv2/density_volumes/neuron_density.nrrd \
--average-densities-path=data/ccfv2/measurements/lit_densities.csv \
--homogenous-regions-path=data/ccfv2/measurements/homogeneous_regions.csv \
--marker=pv:868:data/ccfv2/marker_volumes/pvalb.nrrd \
--marker=sst:1001:data/ccfv2/marker_volumes/SST.nrrd \
--marker=vip:77371835:data/ccfv2/marker_volumes/VIP.nrrd \
--marker=gad67:479:data/ccfv2/marker_volumes/gad1.nrrd \
--realigned-slices-path=atlas_densities/app/data/markers/realigned_slices_ccfv2.json \
--cell-density-standard-deviations=atlas_densities/app/data/measurements/std_cells.json \
--fitted-densities-output-path=data/ccfv2/first_estimates/first_estimates.csv \
--fitting-maps-output-path=data/ccfv2/first_estimates/fitting.json
Expand Down
81 changes: 43 additions & 38 deletions atlas_densities/app/cell_densities.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@
import click
import numpy as np
import pandas as pd
import yaml # type: ignore
from atlas_commons.app_utils import (
EXISTING_FILE_PATH,
assert_properties,
Expand Down Expand Up @@ -91,7 +90,6 @@
EXCITATORY_SPLIT_METADATA = DATA_PATH / "metadata" / "excitatory-inhibitory-splitting.json"
HOMOGENOUS_REGIONS_PATH = DATA_PATH / "measurements" / "homogenous_regions.csv"
HOMOGENOUS_REGIONS_REL_PATH = HOMOGENOUS_REGIONS_PATH.relative_to(AD_PATH)
MARKERS_README_REL_PATH = (DATA_PATH / "markers" / "README.rst").relative_to(AD_PATH)
LINPROG_PATH = "doc/source/bbpp82_628_linprog.pdf"

ALGORITHMS: Dict[str, Callable] = {
Expand Down Expand Up @@ -759,17 +757,6 @@ def measurements_to_average_densities(
"`glia-cell-densities`."
),
)
@click.option(
"--gene-config-path",
type=EXISTING_FILE_PATH,
required=True,
help=(
"Path to the gene markers configuration file. This yaml file contains the paths to the "
"gene marker volumes (nrrd files from AIBS) that will be used to estimate average cell "
"densities accross all AIBS brain regions: PV, SST, VIP and GAD67. See "
f"`{MARKERS_README_REL_PATH}`."
),
)
@click.option(
"--average-densities-path",
required=True,
Expand All @@ -785,12 +772,30 @@ def measurements_to_average_densities(
f"either all inhibitory or all excitatory. Defaults to `{HOMOGENOUS_REGIONS_REL_PATH}`.",
default=HOMOGENOUS_REGIONS_PATH,
)
@click.option(
"--marker",
multiple=True,
type=str,
required=True,
help=("Marker information in the format `marker name`:`marker id`:`path/to/marker.nrrd`"),
)
@click.option(
"--realigned-slices-path",
type=EXISTING_FILE_PATH,
required=True,
help=("JSON file containing mapping of `marker id` to list of slices selected for that marker"),
)
@click.option(
"--cell-density-standard-deviations",
type=EXISTING_FILE_PATH,
required=True,
help=("Standard deviations for cells, in a CSV file"),
)
@click.option(
"--fitted-densities-output-path",
required=True,
help="Path where to write the data frame containing the average cell density of every region"
"found in the brain hierarchy (see --hierarchy-path option) for the cell types marked by the "
"gene markers listed in the gene configuration file (see --gene-config-path option). "
"found in the brain hierarchy (see --hierarchy-path option) for the marked cell types "
"The output file is a CSV file whose first column is a list of region names. The other columns"
" come in pairs for each cell type: ``<cell_type>`` and ``<cell_type>_standard_deviation``."
" Cell types are derived from marker names: ``<cell_type> = <marker>+``.",
Expand All @@ -814,7 +819,9 @@ def fit_average_densities(
annotation_path,
region_name,
neuron_density_path,
gene_config_path,
marker,
realigned_slices_path,
cell_density_standard_deviations,
average_densities_path,
homogenous_regions_path,
fitted_densities_output_path,
Expand All @@ -823,7 +830,6 @@ def fit_average_densities(
): # pylint: disable=too-many-arguments, too-many-locals
"""
Estimate average cell densities of brain regions in `hierarchy_path` for the cell types
marked by the markers listed in `gene_config_path`.
We perform a linear fitting based on average cell densities inferred from the scientific
literature (`average_densities_path`) to estimate average cell densities in regions where
Expand Down Expand Up @@ -877,42 +883,41 @@ def fit_average_densities(

L.info("Loading annotation ...")
annotation = VoxelData.load_nrrd(annotation_path)

L.info("Loading neuron density ...")
neuron_density = VoxelData.load_nrrd(neuron_density_path)
if np.any(neuron_density.raw < 0.0):
raise AtlasDensitiesError(f"Negative density value found in {neuron_density_path}.")

L.info("Loading hierarchy ...")
region_map = RegionMap.load_json(hierarchy_path)
L.info("Loading gene config ...")
with open(gene_config_path, "r", encoding="utf-8") as input_file:
config = yaml.load(input_file, Loader=yaml.FullLoader)

gene_voxeldata = {
gene: VoxelData.load_nrrd(path) for (gene, path) in config["inputGeneVolumePath"].items()
}
# Consistency check
voxel_data = [annotation, neuron_density]
voxel_data += list(gene_voxeldata.values())
assert_properties(voxel_data)
slices = utils.load_json(realigned_slices_path)

gene_marker_volumes = {}
for m in marker:
marker_name, marker_id, marker_path = m.split(":", 3)
gene_marker_volumes[marker_name] = {
"intensity": VoxelData.load_nrrd(marker_path),
"slices": slices[marker_id], # list of integer slice indices
}

assert_properties(
[annotation, neuron_density]
+ [intensity["intensity"] for intensity in gene_marker_volumes.values()]
)

for volume in gene_marker_volumes.values():
volume["intensity"] = volume["intensity"].raw

slices = utils.load_json(config["realignedSlicesPath"])
cell_density_stddev = utils.load_json(config["cellDensityStandardDeviationsPath"])
cell_density_stddev = utils.load_json(cell_density_standard_deviations)
cell_density_stddev = {
# Use the AIBS name attribute as key (this is a unique identifier in 1.json)
# (Ex: former key "|root|Basic cell groups and regions|Cerebrum" -> new key: "Cerebrum")
name.split("|")[-1]: stddev
for (name, stddev) in cell_density_stddev.items()
}

gene_marker_volumes = {
gene: {
"intensity": gene_data.raw,
"slices": slices[config["sectionDataSetID"][gene]], # list of integer slice indices
}
for (gene, gene_data) in gene_voxeldata.items()
}

group_ids_config = utils.load_json(group_ids_config_path)

L.info("Loading average densities dataframe ...")
Expand Down Expand Up @@ -968,7 +973,7 @@ def fit_average_densities(
)
@click.option(
"--algorithm",
type=click.Choice(list(ALGORITHMS.keys())),
type=click.Choice(list(ALGORITHMS)),
required=False,
default="linprog",
help=f"Algorithm to be used. Defaults to 'linprog'. "
Expand Down
20 changes: 1 addition & 19 deletions atlas_densities/app/data/markers/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,28 +52,10 @@ reference volumes.
Fitting of transfer functions from mean region intensity to neuron density
>>>>>>>>>>

The option `--gene-config-path` of the CLI `atlas-densities cell-densities fit-average-densities`
expects a path to a yaml file of the following form:

.. code:: yaml
inputGeneVolumePath:
pv: "pv.nrrd"
sst: "sst.nrrd"
vip: "vip.nrrd"
gad67: "gad67.nrrd"
sectionDataSetID:
pv: 868
sst: 1001
vip: 77371835
gad67: 479
realignedSlicesPath: "realigned_slices_XXX.json"
cellDensityStandardDeviationsPath: "std_cells.json"
The sectionDataSetID values are AIBS dataset identifiers recorded in `realigned_slices_XXX.json`.
An example of this configuration file (`fit_average_densities_ccfv2_config.yaml`) is provided for the
CCFv2 reference volumes.

.. _`Gene Search`: https://mouse.brain-map.org/
.. _`Rodarie et al. (2021)`: https://www.biorxiv.org/content/10.1101/2021.11.20.469384v2
.. _`Eroe et al. (2018)`: https://www.frontiersin.org/articles/10.3389/fninf.2018.00084/full
.. _`Eroe et al. (2018)`: https://www.frontiersin.org/articles/10.3389/fninf.2018.00084/full
61 changes: 21 additions & 40 deletions tests/app/test_cell_densities.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,32 +319,23 @@ def test_measurements_to_average_densities():
assert np.all(actual["measurement_unit"] == "number of cells per mm^3")


def _get_fitting_result(runner):
@pytest.mark.filterwarnings("ignore::atlas_densities.exceptions.AtlasDensitiesWarning")
def test_fit_average_densities():
# fmt: off
args = [
"fit-average-densities",
"--hierarchy-path",
"hierarchy.json",
"--annotation-path",
"annotation.nrrd",
"--neuron-density-path",
"neuron_density.nrrd",
"--gene-config-path",
"gene_config.yaml",
"--average-densities-path",
"average_densities.csv",
"--homogenous-regions-path",
"homogenous_regions.csv",
"--fitted-densities-output-path",
"fitted_densities.csv",
"--fitting-maps-output-path",
"fitting_maps.json",
"--hierarchy-path", "hierarchy.json",
"--annotation-path", "annotation.nrrd",
"--neuron-density-path", "neuron_density.nrrd",
"--average-densities-path", "average_densities.csv",
"--homogenous-regions-path", "homogenous_regions.csv",
"--realigned-slices-path", "realigned_slices.json",
"--cell-density-standard-deviations", "std_cells.json",
"--fitted-densities-output-path", "fitted_densities.csv",
"--fitting-maps-output-path", "fitting_maps.json",
]
# fmt: on

return runner.invoke(tested.app, args)


@pytest.mark.filterwarnings("ignore::atlas_densities.exceptions.AtlasDensitiesWarning")
def test_fit_average_densities():
runner = CliRunner()
with runner.isolated_filesystem():
input_ = get_fitting_input_data()
Expand All @@ -363,23 +354,13 @@ def test_fit_average_densities():
with open("std_cells.json", "w", encoding="utf-8") as out:
json.dump(input_["cell_density_stddevs"], out, indent=1, separators=(",", ": "))

with open("gene_config.yaml", "w", encoding="utf-8") as out:
gene_config = {
"inputGeneVolumePath": {},
"sectionDataSetID": {},
"realignedSlicesPath": "realigned_slices.json",
"cellDensityStandardDeviationsPath": "std_cells.json",
}
for marker, intensity in input_["gene_marker_volumes"].items():
VoxelData(intensity["intensity"], voxel_dimensions=[25.0] * 3).save_nrrd(
marker + ".nrrd"
)
gene_config["inputGeneVolumePath"][marker] = marker + ".nrrd"
gene_config["sectionDataSetID"][marker] = input_["slice_map"][marker]

yaml.dump(gene_config, out)

result = _get_fitting_result(runner)
for marker, intensity in input_["gene_marker_volumes"].items():
VoxelData(intensity["intensity"], voxel_dimensions=[25.0] * 3).save_nrrd(
marker + ".nrrd"
)
args += [f'--marker={marker}:{input_["slice_map"][marker]}:{marker}.nrrd']

result = runner.invoke(tested.app, args)
assert result.exit_code == 0

densities = pd.read_csv("fitted_densities.csv")
Expand All @@ -404,7 +385,7 @@ def test_fit_average_densities():
VoxelData(input_["neuron_density"], voxel_dimensions=(10, 10, 10)).save_nrrd(
"neuron_density.nrrd"
)
result = _get_fitting_result(runner)
result = runner.invoke(tested.app, args)
assert "Negative density value" in str(result.exception)


Expand Down

0 comments on commit 4ee394e

Please sign in to comment.