Skip to content

Commit

Permalink
fix adj. p-value calculation
Browse files Browse the repository at this point in the history
  • Loading branch information
LKremer committed Oct 4, 2024
1 parent 5bbee00 commit 730aa7c
Show file tree
Hide file tree
Showing 2 changed files with 133 additions and 3 deletions.
11 changes: 10 additions & 1 deletion methscan/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,15 @@ def calc_fdr(bools):
# prevent div by 0 in the rare case that the best hit is from a permutation
adj_p_val = 1
adj_p_vals[i] = adj_p_val
# iterate through the list from the back to make sure that there are no values
# greater than one, and to make sure it's continuously increasing
current_min = 1.0
for i_rev in np.arange(1, len(bools))[::-1]:
p = adj_p_vals[i_rev]
if p > current_min:
adj_p_vals[i_rev] = current_min
elif p < current_min:
current_min = p
return adj_p_vals


Expand Down Expand Up @@ -117,7 +126,7 @@ def calc_welch_tstat(group1, group2, min_cells):
Calculates the t-statistic according to Welch's t-test for unequal variances.
Mostly copy-paste from calc_welch_tstat_df, this is ugly but gives a slight edge
in performance.
Returns the t-statistic, df, and the two group sizes.
Returns the t-statistic only.
"""
len_g1 = len(group1)
if len_g1 < min_cells:
Expand Down
125 changes: 123 additions & 2 deletions tests/test_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from click.testing import CliRunner

from methscan.cli import cli
from methscan.diff import parse_cell_groups
from methscan.diff import parse_cell_groups, calc_fdr


def test_parse_cell_groups():
Expand Down Expand Up @@ -52,7 +52,7 @@ def test_diff_cli(tmp_path):
assert dmr[1].sum() == 31250980767
assert dmr[2].sum() == 31252060767
assert dmr[3].sum() == -165.62208458348474
assert dmr[11].sum() == 486.24726241344274
assert dmr[11].sum() == 453.1849885684313
assert len(dmr[dmr[9] == "neuroblast"]) == 284
assert dmr[0][493] == 9
assert dmr[2][135] == 58035292
Expand All @@ -63,3 +63,124 @@ def test_diff_cli(tmp_path):
assert np.all(dmr[6] >= 3)
assert np.all(dmr[7] >= 0) and np.all(dmr[7] <= 1)
assert np.all(dmr[8] >= 0) and np.all(dmr[8] <= 1)


FDR_TEST_DATA = [
{
"input": [
True,
True,
False,
],
"output": {"mean": 0.3333333, "<0.05": 2, "<0.1": 2, "<0.5": 2},
},
{
"input": [
False,
False,
True,
],
"output": {"mean": 1, "<0.05": 0, "<0.1": 0, "<0.5": 0},
},
{
"input": [
True,
True,
True,
True,
True,
False,
True,
True,
True,
True,
True,
True,
True,
True,
True,
True,
True,
True,
False,
True,
False,
False,
False,
True,
True,
False,
True,
True,
False,
True,
True,
False,
False,
True,
True,
True,
False,
True,
False,
True,
False,
True,
False,
False,
True,
False,
False,
True,
True,
False,
True,
True,
True,
False,
False,
True,
False,
False,
True,
False,
True,
False,
True,
True,
True,
True,
False,
False,
False,
True,
False,
False,
False,
True,
True,
False,
True,
True,
False,
True,
False,
False,
False,
False,
True,
],
"output": {"mean": 0.5335206, "<0.05": 5, "<0.1": 18, "<0.5": 36},
},
]


def test_calc_fdr():
for test_d in FDR_TEST_DATA:
input_bools = np.array(test_d["input"])
outputs = test_d["output"]
adj_p_vals = calc_fdr(input_bools)
assert np.sum(adj_p_vals < 0.05) == outputs["<0.05"], "n hits <0.05"
assert np.sum(adj_p_vals < 0.1) == outputs["<0.1"], "n hits <0.1"
assert np.sum(adj_p_vals < 0.5) == outputs["<0.5"], "n hits <0.5"
assert round(np.mean(adj_p_vals), 7) == outputs["mean"], "mean adj. p"

0 comments on commit 730aa7c

Please sign in to comment.