-
Notifications
You must be signed in to change notification settings - Fork 0
/
independence-power-sample-size.py
99 lines (74 loc) · 2.28 KB
/
independence-power-sample-size.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/env python
# coding: utf-8
# Power vs. Sample Size for 20 Relationships
import os
import sys
import numpy as np
from joblib import Parallel, delayed
from hyppo.independence import MGC, Dcorr, Hsic, HHG, CCA, RV
from kmerf import KMERF
from simulations import indep_sim, INDEPENDENCE_SIMS
sys.path.append(os.path.realpath(".."))
SAMP_SIZES = range(10, 110, 10)
DIMENSION = 10
REPS = range(1000, 10000)
SAVE_PATH = "independence-p-{}_n-{}_{}".format(
int(DIMENSION), int(SAMP_SIZES[0]), int(SAMP_SIZES[-1])
)
TESTS = {
"KMERF": KMERF(forest="regressor"),
"MGC": MGC(),
"Dcorr": Dcorr(),
"Hsic": Hsic(),
"HHG": HHG(),
"CCA": CCA(),
"RV": RV(),
}
def _indep_sim_gen(sim, n, p, noise=True):
"""
Generate x, y from each sim
"""
if sim in ["multiplicative_noise", "multimodal_independence"]:
x, y = indep_sim(sim, n, p)
else:
x, y = indep_sim(sim, n, p, noise=noise)
return x, y
def _perm_stat(est, sim, n=100, p=3, noise=True):
"""
Generates null and alternate distributions
"""
X, y = _indep_sim_gen(sim, n, p, noise=noise)
obs_stat = est.statistic(X, y)
permy = np.random.permutation(y)
perm_stat = est.statistic(X, permy)
return obs_stat, perm_stat
def _nonperm_pval(est, sim, n=100, p=3, noise=True):
"""
Generates fast permutation pvalues
"""
X, y = _indep_sim_gen(sim, n, p, noise=noise)
pvalue = est.test(X, y)[1]
return pvalue
def compute_null(rep, est, est_name, sim, n=100):
"""
Calculates empirical null and alternate distribution for each test.
"""
# if est_name in ["Dcorr", "Hsic"]:
# pval = _nonperm_pval(est, sim, n=n)
# save_kwargs = {"X": [pval]}
# else:
alt_dist, null_dist = _perm_stat(est, sim, n=n)
save_kwargs = {"X": [alt_dist, null_dist], "delimiter": ","}
np.savetxt(
"{}/{}_{}_{}_{}.txt".format(SAVE_PATH, sim, est_name, n, rep), **save_kwargs
)
# Run this block to regenerate power curves. Note that this takes a very long time!
_ = Parallel(n_jobs=-1, verbose=100)(
[
delayed(compute_null)(rep, est, est_name, sim, n=samp_size)
for rep in REPS
for est_name, est in TESTS.items()
for sim in INDEPENDENCE_SIMS.keys()
for samp_size in SAMP_SIZES
]
)