forked from ANGSD/angsd
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfet.c
109 lines (98 loc) · 3.09 KB
/
fet.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#include <math.h>
#include <stdlib.h>
/* This program is implemented with ideas from this web page:
*
* http://www.langsrud.com/fisher.htm
*/
// log\binom{n}{k}
static double lbinom(int n, int k){
if (k == 0 || n == k) return 0;
return lgamma(n+1) - lgamma(k+1) - lgamma(n-k+1);
}
// n11 n12 | n1_
// n21 n22 | n2_
//-----------+----
// n_1 n_2 | n
// hypergeometric distribution
static double hypergeo(int n11, int n1_, int n_1, int n){
return exp(lbinom(n1_, n11) + lbinom(n-n1_, n_1-n11) - lbinom(n, n_1));
}
typedef struct {
int n11, n1_, n_1, n;
double p;
} hgacc_t;
// incremental version of hypergenometric distribution
static double hypergeo_acc(int n11, int n1_, int n_1, int n, hgacc_t *aux){
if (n1_ || n_1 || n) {
aux->n11 = n11; aux->n1_ = n1_; aux->n_1 = n_1; aux->n = n;
} else { // then only n11 changed; the rest fixed
if (n11%11 && n11 + aux->n - aux->n1_ - aux->n_1) {
if (n11 == aux->n11 + 1) { // incremental
aux->p *= (double)(aux->n1_ - aux->n11) / n11
* (aux->n_1 - aux->n11) / (n11 + aux->n - aux->n1_ - aux->n_1);
aux->n11 = n11;
return aux->p;
}
if (n11 == aux->n11 - 1) { // incremental
aux->p *= (double)aux->n11 / (aux->n1_ - n11)
* (aux->n11 + aux->n - aux->n1_ - aux->n_1) / (aux->n_1 - n11);
aux->n11 = n11;
return aux->p;
}
}
aux->n11 = n11;
}
aux->p = hypergeo(aux->n11, aux->n1_, aux->n_1, aux->n);
return aux->p;
}
double kt_fisher_exact(int n11, int n12, int n21, int n22, double *_left, double *_right, double *two){
int i, j, max, min;
double p, q, left, right;
hgacc_t aux;
int n1_, n_1, n;
n1_ = n11 + n12;
n_1 = n11 + n21; n = n11 + n12 + n21 + n22; // calculate n1_, n_1 and n
max = (n_1 < n1_) ? n_1 : n1_; // max n11, for right tail
min = n1_ + n_1 - n;
if (min < 0) min = 0; // min n11, for left tail
*two = *_left = *_right = 1.;
if (min == max) return 1.; // no need to do test
q = hypergeo_acc(n11, n1_, n_1, n, &aux); // the probability of the current table
// left tail
p = hypergeo_acc(min, 0, 0, 0, &aux);
for (left = 0., i = min + 1; p < 0.99999999 * q; ++i) // loop until underflow
left += p, p = hypergeo_acc(i, 0, 0, 0, &aux);
--i;
if (p < 1.00000001 * q) left += p;
else --i;
// right tail
p = hypergeo_acc(max, 0, 0, 0, &aux);
for (right = 0., j = max - 1; p < 0.99999999 * q; --j) // loop until underflow
right += p, p = hypergeo_acc(j, 0, 0, 0, &aux);
++j;
if (p < 1.00000001 * q) right += p;
else ++j;
// two-tail
*two = left + right;
if (*two > 1.) *two = 1.;
// adjust left and right
if (abs(i - n11) < abs(j - n11)) right = 1. - left + q;
else left = 1.0 - right + q;
*_left = left; *_right = right;
return q;
}
#ifdef FET_MAIN
#include <stdio.h>
int main(int argc, char *argv[])
{
char id[1024];
int n11, n12, n21, n22;
double left, right, twotail, prob;
while (scanf("%s%d%d%d%d", id, &n11, &n12, &n21, &n22) == 5) {
prob = kt_fisher_exact(n11, n12, n21, n22, &left, &right, &twotail);
printf("%s\t%d\t%d\t%d\t%d\t%.6g\t%.6g\t%.6g\t%.6g\n", id, n11, n12, n21, n22,
prob, left, right, twotail);
}
return 0;
}
#endif