Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add r_square values to the results of the fitting. #68

Merged
merged 1 commit into from
Feb 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -265,8 +265,11 @@ details). This step leverages AIBS ISH marker datasets (in their expression form
literature density values.
These transfer functions are used to obtain first estimates of neuron densities in regions not
covered by literature.
The result of the following command is a list of first density estimates for each neuron type and
for each region of the annotation volume.

The results of the following command includes a csv file (`first_estimates.csv`) storing the list
of first density estimates for each neuron type and for each region of the annotation volume.
The `fitting.json` output file contains the coefficients fitted by the algorithm together with their
respective standard deviation and `coefficient of determination`_ (`r_square`).

.. code-block:: bash

Expand Down Expand Up @@ -449,6 +452,7 @@ Copyright (c) 2022-2024 Blue Brain Project/EPFL
.. _`mmc3.xlsx`: https://github.com/BlueBrain/atlas-densities/blob/main/atlas_densities/app/data/measurements/mmc3.xlsx
.. _`gaba_papers.xlsx`: https://github.com/BlueBrain/atlas-densities/blob/main/atlas_densities/app/data/measurements/gaba_papers.xlsx
.. _`mtypes_probability_map_config.yaml`: https://github.com/BlueBrain/atlas-densities/blob/main/atlas_densities/app/data/mtypes/mtypes_probability_map_config.yaml
.. _`coefficient of determination`: https://en.wikipedia.org/wiki/Coefficient_of_determination

.. substitutions

Expand Down
38 changes: 25 additions & 13 deletions atlas_densities/densities/fitting.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,9 @@ def linear_fitting_xy(
otherwise by 1.0.

Returns:
a dict of the form {"coefficient": <float>, "standard_deviation": <float>}.
a dict of the form {"coefficient": <float>,
"standard_deviation": <float>,
"r_square": <float>}.

Raises:
AtlasDensitiesError if some of the `sigma`values are negative.
Expand All @@ -311,15 +313,24 @@ def linear_fitting_xy(
least_positive = np.min(sigma[~zero_mask])
sigma[zero_mask] = least_positive / 2.0

def _optimize_func(x, coefficient):
return coefficient * np.array(x)

parameters = curve_fit(
lambda x, coefficient: coefficient * x,
_optimize_func,
xdata=xdata,
ydata=ydata,
sigma=sigma,
absolute_sigma=True,
)

return {"coefficient": parameters[0][0], "standard_deviation": np.sqrt(parameters[1][0][0])}
ss_reg = np.sum((_optimize_func(xdata, parameters[0][0]) - np.mean(ydata)) ** 2)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

annoying we have to compute this ourselves.

ss_tot = np.sum((np.array(ydata) - _optimize_func(xdata, parameters[0][0])) ** 2) + ss_reg
# if total sum of square is null, no variance can be explained by the fitting
return {
"coefficient": parameters[0][0],
"standard_deviation": np.sqrt(parameters[1][0][0]),
"r_square": (ss_reg / ss_tot) if ss_tot > 0 else np.nan,
}


FittingData = Dict[str, Dict[str, Dict[str, float]]]
Expand Down Expand Up @@ -350,23 +361,24 @@ def compute_fitting_coefficients(
dict of the form (values are fake):
{
"Isocortex" : {
"gad67": { "coefficient": 0.85, "standard_deviation": 0.15},
"pv": { "coefficient": 1.00, "standard_deviation": 0.5},
"sst": { "coefficient": 1.07, "standard_deviation": 0.75},
"vip": { "coefficient": 0.95, "standard_deviation": 0.15},
"gad67": { "coefficient": 0.85, "standard_deviation": 0.15, "r_square": 0.75},
"pv": { "coefficient": 1.00, "standard_deviation": 0.5, "r_square": 0.66},
"sst": { "coefficient": 1.07, "standard_deviation": 0.75, "r_square": 0.80},
"vip": { "coefficient": 0.95, "standard_deviation": 0.15, "r_square": 0.90},
},
"Cerebellum" : {
"gad67": { "coefficient": 0.75, "standard_deviation": 0.15},
"pv": { "coefficient": 1.10, "standard_deviation": 0.55},
"sst": { "coefficient": 1.20, "standard_deviation": 0.45},
"vip": { "coefficient": 0.95, "standard_deviation": 0.15},
"gad67": { "coefficient": 0.75, "standard_deviation": 0.15, "r_square": 0.55},
"pv": { "coefficient": 1.10, "standard_deviation": 0.55, "r_square": 0.40},
"sst": { "coefficient": 1.20, "standard_deviation": 0.45, "r_square": 0.58},
"vip": { "coefficient": 0.95, "standard_deviation": 0.15, "r_square": 0.77},
},
...
}
The keys of the dict are the names of the region groups where separate fittings are
performed. A "coefficient" value is the value of the linear regression coefficient for a
given region group and a given cell type.
The "standard_deviation" value is the standard deviation of the coefficient value.
The "r_square" value is the coefficient of determination of the coefficient value.
"""

if len(densities.index) != len(average_intensities.index) or np.any(
Expand All @@ -387,7 +399,7 @@ def compute_fitting_coefficients(

result = {
group_name: {
cell_type: {"coefficient": np.nan, "standard_deviation": np.nan}
cell_type: {"coefficient": np.nan, "standard_deviation": np.nan, "r_square": np.nan}
for cell_type in cell_types
}
for group_name in groups
Expand Down
7 changes: 7 additions & 0 deletions tests/densities/test_fitting.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,15 +268,18 @@ def test_compute_average_intensities(hierarchy_info):
def test_linear_fitting_xy():
actual = tested.linear_fitting_xy([0.0, 1.0, 2.0], [0.0, 2.0, 4.0], [1.0, 1.0, 1.0])
assert np.allclose(actual["coefficient"], 2.0)
assert np.allclose(actual["r_square"], 1.0)
assert not np.isinf(actual["standard_deviation"])

actual = tested.linear_fitting_xy([0.0, 1.0, 2.0], [0.0, 1.0, 4.0], [1.0, 1.0, 1e-5])
assert np.allclose(actual["coefficient"], 2.0)
assert not np.isinf(actual["standard_deviation"])
assert np.allclose(actual["r_square"], 0.89286)

actual = tested.linear_fitting_xy([0.0, 1.0, 2.0], [0.0, 2.0, 4.0], [1.0, 0.0, 1.0])
assert np.allclose(actual["coefficient"], 2.0)
assert not np.isinf(actual["standard_deviation"])
assert np.allclose(actual["r_square"], 1.0)


def get_fitting_input_data_(hierarchy_info):
Expand Down Expand Up @@ -319,6 +322,10 @@ def test_compute_fitting_coefficients(hierarchy_info):
assert not np.isnan(actual[group_name]["gad67+"]["standard_deviation"])
assert not np.isinf(actual[group_name]["pv+"]["standard_deviation"])
assert not np.isnan(actual[group_name]["pv+"]["standard_deviation"])
assert actual["Whole"]["pv+"]["r_square"] == 1.0
assert actual["Whole"]["gad67+"]["r_square"] == 1.0
assert np.isnan(actual["Central lobule"]["pv+"]["r_square"]) # only one value so no variation.
assert np.isnan(actual["Central lobule"]["gad67+"]["r_square"])


def test_compute_fitting_coefficients_exceptions(hierarchy_info):
Expand Down
Loading