-
Notifications
You must be signed in to change notification settings - Fork 67
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #112 from broadinstitute/irwin-stats
- Loading branch information
Showing
16 changed files
with
647 additions
and
133 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Seed-stock-137_S2_L001_001 18950 28 60 61 |
15 changes: 15 additions & 0 deletions
15
test/input/TestPerSample/vphaser_one_sample_3libs_expected.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
chr1 55 T C 0.8156 snp 16.1017 C:65:34:45:21:11:8:9:5:0.425 T:13:6:10:6:2:0:1:0:0.425 | ||
chr1 483 T C 0.5142 snp 23.6111 C:34:21:24:11:5:5:5:5:0.9192 T:8:9:5:7:1:1:2:1:0.9192 | ||
chr1 573 T C 0.5073 snp 13.7405 C:66:47:50:29:12:10:4:8:0.1632 T:12:6:6:3:3:2:3:1:0.1632 | ||
chr1 660 C T 0.2772 snp 44.2724 T:109:71:79:43:18:14:12:14:0.6371 C:78:65:55:48:13:11:10:6:0.6371 | ||
chr1 999 T C 0.9182 snp 12.8478 C:527:538:349:356:81:97:97:85:0.3398 T:80:77:60:52:10:15:10:10:0.3398 | ||
chr1 1117 A G 0.838 snp 15.0547 G:449:482:304:327:64:81:81:74:0.7364 A:84:81:57:50:13:14:14:17:0.7364 | ||
chr1 1293 T C 0.09971 snp 17.2697 C:258:245:183:172:32:37:43:36:0.3501 T:63:42:42:29:10:10:11:3:0.3501 | ||
chr2 8766 T C 0.6568 snp 8.33333 C:132:99:96:67:15:15:21:18:0.5642 T:13:8:8:5:3:1:2:2:0.5642 | ||
chr2 8808 T C 0.1318 snp 16.8421 C:130:107:93:74:15:17:22:16:0.7091 T:32:16:22:10:6:0:4:6:0.7091 | ||
chr2 9123 T C 0.6938 snp 14.5161 C:150:168:100:113:25:21:25:34:0.7732 T:24:30:18:21:3:4:3:5:0.7732 | ||
chr2 9151 T C 0.4294 snp 15 C:142:147:98:100:20:18:24:31:0.801 T:22:29:15:20:4:4:3:5:0.801 | ||
chr2 9272 G A 0.2018 snp 17.5214 A:82:111:53:75:14:12:15:24:0.8461 G:28:13:22:5:3:4:4:4:0.8461 | ||
chr2 9687 T C 0.7036 snp 11.8182 C:67:127:43:85:14:19:10:23:0.9523 T:8:18:6:11:1:4:1:3:0.9523 | ||
chr2 9912 T C 0.1715 snp 24.8744 C:104:195:74:123:18:34:12:38:0.9697 T:42:57:27:39:7:9:8:9:0.9697 | ||
chr2 9960 C T 0.3394 snp 11.9481 T:120:219:80:144:22:32:18:43:0.5227 C:13:33:8:22:2:8:3:3:0.5227 |
2 changes: 2 additions & 0 deletions
2
test/input/TestPerSample/vphaser_one_sample_indels_expected.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
Seed-stock-137_S2_L001_001 6911 IA d 0.1453 lp 5.04492 d:1059:315:1132:328:1 IA:56:10:56:10:1 IAA:6:0:6:0:1 IAAA:1:0:1:0:1 | ||
Seed-stock-137_S2_L001_001 13181 D1 i 0.3567 lp 5.71688 i:879:160:939:166:1 D1:56:7:56:7:1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,6 @@ | ||
'''A few miscellaneous tools. ''' | ||
from __future__ import division # Division of integers with / should never round! | ||
from math import exp, log, pi | ||
import itertools | ||
# import scipy # Commented out until installation on Travis is working | ||
|
||
__author__ = "[email protected]" | ||
|
||
|
@@ -59,28 +57,6 @@ def intervals(i, n, l): | |
stop = l | ||
return (start,stop) | ||
|
||
|
||
try: | ||
# Python 3.4 | ||
from statistics import mean, median | ||
except ImportError: | ||
# Python <3.4, avoid numpy if these two methods are all we really need | ||
def mean(l): | ||
if len(l)>0: | ||
return float(sum(l))/len(l) | ||
else: | ||
raise Exception("empty list for mean") | ||
def median(l): | ||
if len(l)>0: | ||
half = len(l) // 2 | ||
l.sort() | ||
if len(l) % 2 == 0: | ||
return (l[half-1] + l[half]) / 2.0 | ||
else: | ||
return l[half] | ||
else: | ||
raise Exception("empty list for median") | ||
|
||
# from http://stackoverflow.com/a/312467 | ||
def batch_iterator(iterator, batch_size) : | ||
"""Returns lists of length batch_size. | ||
|
@@ -100,77 +76,3 @@ def batch_iterator(iterator, batch_size) : | |
yield item | ||
item = list(itertools.islice(it, batch_size)) | ||
|
||
def chi2_contingency(contingencyTable) : | ||
""" Return two-tailed p-value for an n x m contingency table using chi-square | ||
distribution. Not recommended if any of the counts or expected counts are | ||
less than 5. Input is a sequence of n sequences of size m. | ||
""" | ||
# Commented out scipy version until scipy installation is working: | ||
# return scipy.stats.chi2_contingency(contingencyTable)[1] | ||
|
||
return 1.0 | ||
|
||
def log_stirling(n) : | ||
"""Return Stirling's approximation for log(n!) using up to n^7 term. | ||
Provides exact right answer (when rounded to int) for 16! and lower. | ||
Correct to 10 digits for 5! and above.""" | ||
n2 = n * n | ||
n3 = n * n2 | ||
n5 = n3 * n2 | ||
n7 = n5 * n2 | ||
return n * log(n) - n + 0.5 * log(2 * pi * n) + \ | ||
1 / 12.0 / n - 1 / 360.0 / n3 + 1 / 1260.0 / n5 - 1 / 1680.0 / n7 | ||
|
||
def fisher_exact(contingencyTable2x2) : | ||
""" Return two-tailed p-value for a 2 x 2 contingency table using Fisher's | ||
exact test. Input is a sequence of 2 sequences of size 2. | ||
""" | ||
# Python code below is accurate to around 1e-11 and is much faster than | ||
# the scipy version. However, if for some reason we want to use the scipy | ||
# version instead, here's the call: | ||
# return scipy.stats.fisher_exact(contingencyTable)[1] | ||
|
||
a, b = contingencyTable2x2[0][0], contingencyTable2x2[0][1] | ||
c, d = contingencyTable2x2[1][0], contingencyTable2x2[1][1] | ||
|
||
def log_choose(n, k) : | ||
k = min(k, n - k) | ||
if k <= 10 : | ||
result = 0.0 | ||
for ii in range(1, k + 1) : | ||
result += log(n - ii + 1) - log(ii) | ||
else : | ||
result = log_stirling(n) - log_stirling(k) - log_stirling(n-k) | ||
return result | ||
|
||
s = a + b | ||
t = a + c | ||
n = a + b + c + d | ||
logChooseNT = log_choose(n, t) | ||
|
||
def mydhyper(a, s, t, n) : | ||
# Probability that intersection of subsets of size s and t of a set with | ||
# n elements has size exactly a. | ||
if max(0, s + t - n) <= a <= min(s, t) : | ||
return exp(log_choose(s, a) + log_choose(n - s, t - a) - logChooseNT) | ||
else : | ||
return 0 | ||
|
||
result = p0 = mydhyper(a, s, t, n) | ||
|
||
# Count up from 0 and down from min(s, t) adding all smaller p-vals | ||
for x in range(a) : | ||
prob = mydhyper(x, s, t, n) | ||
if prob <= p0 : | ||
result += prob | ||
else : | ||
break | ||
for y in range(min(s, t), a, -1) : | ||
prob = mydhyper(y, s, t, n) | ||
if prob <= p0 : | ||
result += prob | ||
else : | ||
break | ||
|
||
return result | ||
|
Oops, something went wrong.