-
Notifications
You must be signed in to change notification settings - Fork 0
/
two-sample-power-sample-size.py
106 lines (82 loc) · 2.39 KB
/
two-sample-power-sample-size.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#!/usr/bin/env python
# coding: utf-8
# Power vs. Sample Size for 15 Relationships
import os
import sys
import numpy as np
from joblib import Parallel, delayed
from hyppo.independence import MGC, Dcorr, Hsic, HHG, CCA, RV
from kmerf import KMERF
from simulations import make_marron_wand_classification, MARRON_WAND_SIMS
sys.path.append(os.path.realpath(".."))
SAMP_SIZES = range(10, 110, 10)
DIMENSION = 10
REPS = range(1000)
SAVE_PATH = "two-sample-p-{}_n-{}_{}".format(
int(DIMENSION), int(SAMP_SIZES[0]), int(SAMP_SIZES[-1])
)
TESTS = {
"KMERF": KMERF(forest="classifier"),
"MGC": MGC(),
"Dcorr": Dcorr(),
"Hsic": Hsic(),
"HHG": HHG(),
"CCA": CCA(),
"RV": RV(),
}
def _sim_slice(X, n):
"""
Generate x, y from each sim
"""
X_t = np.concatenate(
(X[: n // 2], X[SAMP_SIZES[-1] // 2 : SAMP_SIZES[-1] // 2 + n // 2])
)
y_t = np.concatenate((np.zeros(n // 2), np.ones(n // 2)))
return X_t, y_t
def _perm_stat(est, X, n):
"""
Generates null and alternate distributions
"""
X, y = _sim_slice(X, n)
y = y.reshape(-1, 1)
obs_stat = est.statistic(X, y)
permy = np.random.permutation(y)
perm_stat = est.statistic(X, permy)
return obs_stat, perm_stat
def _nonperm_pval(est, X, n):
"""
Generates fast permutation pvalues
"""
X, y = _sim_slice(X, n)
pvalue = est.test(X, y)[1]
return pvalue
def compute_null(rep, est, est_name, sim, n=100):
"""
Calculates empirical null and alternate distribution for each test.
"""
X, _ = make_marron_wand_classification(
n_samples=SAMP_SIZES[-1],
n_dim=DIMENSION,
n_informative=1,
simulation=sim,
seed=rep,
)
#if est_name in ["Dcorr", "Hsic"]:
# pval = _nonperm_pval(est, X, n)
# save_kwargs = {"X": [pval]}
#else:
alt_dist, null_dist = _perm_stat(est, X, n)
save_kwargs = {"X": [alt_dist, null_dist], "delimiter": ","}
np.savetxt(
"{}/{}_{}_{}_{}.txt".format(SAVE_PATH, sim, est_name, n, rep), **save_kwargs
)
# Run this block to regenerate power curves. Note that this takes a very long time!
_ = Parallel(n_jobs=-1, verbose=100)(
[
delayed(compute_null)(rep, est, est_name, sim, n=samp_size)
for rep in REPS
for est_name, est in TESTS.items()
for sim in MARRON_WAND_SIMS.keys()
for samp_size in SAMP_SIZES
]
)