Skip to content

Commit

Permalink
Add r_square values to the results of the fitting. (#68)
Browse files Browse the repository at this point in the history
  • Loading branch information
drodarie authored Feb 21, 2024
1 parent 376c0a8 commit 6e76e8c
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 15 deletions.
8 changes: 6 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -265,8 +265,11 @@ details). This step leverages AIBS ISH marker datasets (in their expression form
literature density values.
These transfer functions are used to obtain first estimates of neuron densities in regions not
covered by literature.
The result of the following command is a list of first density estimates for each neuron type and
for each region of the annotation volume.

The results of the following command includes a csv file (`first_estimates.csv`) storing the list
of first density estimates for each neuron type and for each region of the annotation volume.
The `fitting.json` output file contains the coefficients fitted by the algorithm together with their
respective standard deviation and `coefficient of determination`_ (`r_square`).

.. code-block:: bash
Expand Down Expand Up @@ -449,6 +452,7 @@ Copyright (c) 2022-2024 Blue Brain Project/EPFL
.. _`mmc3.xlsx`: https://github.com/BlueBrain/atlas-densities/blob/main/atlas_densities/app/data/measurements/mmc3.xlsx
.. _`gaba_papers.xlsx`: https://github.com/BlueBrain/atlas-densities/blob/main/atlas_densities/app/data/measurements/gaba_papers.xlsx
.. _`mtypes_probability_map_config.yaml`: https://github.com/BlueBrain/atlas-densities/blob/main/atlas_densities/app/data/mtypes/mtypes_probability_map_config.yaml
.. _`coefficient of determination`: https://en.wikipedia.org/wiki/Coefficient_of_determination

.. substitutions
Expand Down
38 changes: 25 additions & 13 deletions atlas_densities/densities/fitting.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,9 @@ def linear_fitting_xy(
otherwise by 1.0.
Returns:
a dict of the form {"coefficient": <float>, "standard_deviation": <float>}.
a dict of the form {"coefficient": <float>,
"standard_deviation": <float>,
"r_square": <float>}.
Raises:
AtlasDensitiesError if some of the `sigma`values are negative.
Expand All @@ -311,15 +313,24 @@ def linear_fitting_xy(
least_positive = np.min(sigma[~zero_mask])
sigma[zero_mask] = least_positive / 2.0

def _optimize_func(x, coefficient):
return coefficient * np.array(x)

parameters = curve_fit(
lambda x, coefficient: coefficient * x,
_optimize_func,
xdata=xdata,
ydata=ydata,
sigma=sigma,
absolute_sigma=True,
)

return {"coefficient": parameters[0][0], "standard_deviation": np.sqrt(parameters[1][0][0])}
ss_reg = np.sum((_optimize_func(xdata, parameters[0][0]) - np.mean(ydata)) ** 2)
ss_tot = np.sum((np.array(ydata) - _optimize_func(xdata, parameters[0][0])) ** 2) + ss_reg
# if total sum of square is null, no variance can be explained by the fitting
return {
"coefficient": parameters[0][0],
"standard_deviation": np.sqrt(parameters[1][0][0]),
"r_square": (ss_reg / ss_tot) if ss_tot > 0 else np.nan,
}


FittingData = Dict[str, Dict[str, Dict[str, float]]]
Expand Down Expand Up @@ -350,23 +361,24 @@ def compute_fitting_coefficients(
dict of the form (values are fake):
{
"Isocortex" : {
"gad67": { "coefficient": 0.85, "standard_deviation": 0.15},
"pv": { "coefficient": 1.00, "standard_deviation": 0.5},
"sst": { "coefficient": 1.07, "standard_deviation": 0.75},
"vip": { "coefficient": 0.95, "standard_deviation": 0.15},
"gad67": { "coefficient": 0.85, "standard_deviation": 0.15, "r_square": 0.75},
"pv": { "coefficient": 1.00, "standard_deviation": 0.5, "r_square": 0.66},
"sst": { "coefficient": 1.07, "standard_deviation": 0.75, "r_square": 0.80},
"vip": { "coefficient": 0.95, "standard_deviation": 0.15, "r_square": 0.90},
},
"Cerebellum" : {
"gad67": { "coefficient": 0.75, "standard_deviation": 0.15},
"pv": { "coefficient": 1.10, "standard_deviation": 0.55},
"sst": { "coefficient": 1.20, "standard_deviation": 0.45},
"vip": { "coefficient": 0.95, "standard_deviation": 0.15},
"gad67": { "coefficient": 0.75, "standard_deviation": 0.15, "r_square": 0.55},
"pv": { "coefficient": 1.10, "standard_deviation": 0.55, "r_square": 0.40},
"sst": { "coefficient": 1.20, "standard_deviation": 0.45, "r_square": 0.58},
"vip": { "coefficient": 0.95, "standard_deviation": 0.15, "r_square": 0.77},
},
...
}
The keys of the dict are the names of the region groups where separate fittings are
performed. A "coefficient" value is the value of the linear regression coefficient for a
given region group and a given cell type.
The "standard_deviation" value is the standard deviation of the coefficient value.
The "r_square" value is the coefficient of determination of the coefficient value.
"""

if len(densities.index) != len(average_intensities.index) or np.any(
Expand All @@ -387,7 +399,7 @@ def compute_fitting_coefficients(

result = {
group_name: {
cell_type: {"coefficient": np.nan, "standard_deviation": np.nan}
cell_type: {"coefficient": np.nan, "standard_deviation": np.nan, "r_square": np.nan}
for cell_type in cell_types
}
for group_name in groups
Expand Down
7 changes: 7 additions & 0 deletions tests/densities/test_fitting.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,15 +268,18 @@ def test_compute_average_intensities(hierarchy_info):
def test_linear_fitting_xy():
actual = tested.linear_fitting_xy([0.0, 1.0, 2.0], [0.0, 2.0, 4.0], [1.0, 1.0, 1.0])
assert np.allclose(actual["coefficient"], 2.0)
assert np.allclose(actual["r_square"], 1.0)
assert not np.isinf(actual["standard_deviation"])

actual = tested.linear_fitting_xy([0.0, 1.0, 2.0], [0.0, 1.0, 4.0], [1.0, 1.0, 1e-5])
assert np.allclose(actual["coefficient"], 2.0)
assert not np.isinf(actual["standard_deviation"])
assert np.allclose(actual["r_square"], 0.89286)

actual = tested.linear_fitting_xy([0.0, 1.0, 2.0], [0.0, 2.0, 4.0], [1.0, 0.0, 1.0])
assert np.allclose(actual["coefficient"], 2.0)
assert not np.isinf(actual["standard_deviation"])
assert np.allclose(actual["r_square"], 1.0)


def get_fitting_input_data_(hierarchy_info):
Expand Down Expand Up @@ -319,6 +322,10 @@ def test_compute_fitting_coefficients(hierarchy_info):
assert not np.isnan(actual[group_name]["gad67+"]["standard_deviation"])
assert not np.isinf(actual[group_name]["pv+"]["standard_deviation"])
assert not np.isnan(actual[group_name]["pv+"]["standard_deviation"])
assert actual["Whole"]["pv+"]["r_square"] == 1.0
assert actual["Whole"]["gad67+"]["r_square"] == 1.0
assert np.isnan(actual["Central lobule"]["pv+"]["r_square"]) # only one value so no variation.
assert np.isnan(actual["Central lobule"]["gad67+"]["r_square"])


def test_compute_fitting_coefficients_exceptions(hierarchy_info):
Expand Down

0 comments on commit 6e76e8c

Please sign in to comment.