-
Notifications
You must be signed in to change notification settings - Fork 0
/
statistical2.py
111 lines (93 loc) · 3.46 KB
/
statistical2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import numpy as np
import matplotlib.pyplot as plt
import config
from scipy.ndimage import gaussian_filter1d
from strlearn.utils import scores_to_cummean
from tabulate import tabulate
from scipy import stats
def t_test_corrected(a, b, J=1, k=10):
"""
Corrected t-test for repeated cross-validation.
input, two 2d arrays. Repetitions x folds
As default for 5x5CV
"""
if J*k != a.shape[0]:
raise Exception('%i scores received, but J=%i, k=%i (J*k=%i)' % (
a.shape[0], J, k, J*k
))
d = a - b
bar_d = np.mean(d)
bar_sigma_2 = np.var(d.reshape(-1), ddof=1)
bar_sigma_2_mod = (1 / (J * k) + 1 / (k - 1)) * bar_sigma_2
t_stat = bar_d / np.sqrt(bar_sigma_2_mod)
pval = stats.t.sf(np.abs(t_stat), (k * J) - 1) * 2
return t_stat, pval
weights = config.str_weights()
weigths_names = config.str_weights_names()
borders = config.borders()
criteria = config.criteria()
base_clfs = config.base_clfs()
base_clfs_names = config.base_clf_names()
methods = ["RAW", "MEAN", "PREV", "DSCA"]
reps = 10
chunks = 500
alpha = .05
wn = np.array(weigths_names)
res = np.load('results/res_e1_all.npy')
raw_clfs_res = res[:,:,:len(base_clfs)].squeeze()
print(raw_clfs_res.shape)
# streams x clfs, reps
mean_raw_clfs_res = np.mean(raw_clfs_res, axis=3)
mean_raw_clfs_res = np.swapaxes(mean_raw_clfs_res, 0, 1)
mean_raw_clfs_res = np.swapaxes(mean_raw_clfs_res, 1, 2)
meta_res = res[:,:,len(base_clfs):]
# (reps x streams x base_clfs x criteria x borders x (mean, prev, dsca) x chunks)
meta_res = meta_res.reshape((reps,len(weights),len(base_clfs),len(criteria),len(borders),3,chunks-1))
# (reps x streams x base_clfs x criteria x borders x (mean, prev, dsca))
mean_meta_res = np.mean(meta_res, axis=6)
# (reps x streams x base_clfs)
mean_mean = mean_meta_res[:,:,:,0,5,0]
mean_prev = mean_meta_res[:,:,:,0,-1,1]
mean_dsca = mean_meta_res[:,:,:,0,-1,2]
# streams x base_clfs x method x reps
scores = np.stack((mean_mean, mean_prev, mean_dsca), axis=3)
scores = np.swapaxes(scores, 0, 3)
scores = np.swapaxes(scores, 0, 1)
scores = np.swapaxes(scores, 1, 2)
print(scores.shape)
mean_scores = np.mean(scores, axis=3)
scores = np.concatenate((mean_raw_clfs_res[:, :, np.newaxis, :], scores), axis=2)
mean_scores = np.mean(scores, axis=3)
headers = ["STREAM"] + methods
all = []
for base_idx, base in enumerate(base_clfs_names):
print(base)
t = []
for s_idx, stream in enumerate(weigths_names):
t.append(["%s" % stream] +
["%.3f" % v for v in mean_scores[s_idx, base_idx, :]])
T, p = np.array(
[[t_test_corrected(scores[s_idx, base_idx, i, :],
scores[s_idx, base_idx, j, :])
for i in range(4)]
for j in range(4)]
).swapaxes(0, 2)
_ = np.where((p < alpha) * (T > 0))
conclusions = [list(1 + _[1][_[0] == i])
for i in range(4)]
t.append([''] + [", ".join(["%i" % i for i in c])
if len(c) > 0 and len(c) < 4-1
else ("all" if len(c) == 4-1 else "---")
for c in conclusions])
print(tabulate(t, headers=headers))
all.append(np.array(t))
# exit()
print(all[0].shape)
for idx, table in enumerate(all):
if idx == 0:
whole = table
else:
whole = np.concatenate((whole, table[:, 1:]), axis=1)
headers = ["STREAM"] + 5 * methods
print(tabulate(whole, headers=headers, tablefmt="latex_booktabs"))
print(whole.shape)