-
Notifications
You must be signed in to change notification settings - Fork 0
/
bootstrapfrac.py
146 lines (107 loc) · 4.55 KB
/
bootstrapfrac.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import itertools, sys, os
from itertools import chain
import networkx as nx
from transform_labels_to_nx import *
from look_up_table import *
import random
from numpy import std
from math import sqrt
from scipy import stats
look_up = look_up_table()
class bootstrapfrac:
def __init__(self,set_of_all_samples):
self.dataset = self._values(set_of_all_samples, "percentage_weight_change")
#print "dataset", self.dataset
self.pairs = self._pairs(self.dataset)
mean_delta = self._true_mean_difference()
frac_delta = self._true_frac_difference()
#print "mean difference between pairs", mean_delta
#print "pairs", self.pairs
def _pairs(self, dataset):
"""given a lists of lists , returns the combination of list pairings"""
return list(itertools.combinations(dataset,2))
def bootstrap (self,N=1000):
""" bootstrap algorithm to test for equality of means"""
dist_of_stat = []
for p in self.pairs:
population = list(itertools.chain(*p))
k1,k2 = map(len,p)
print "k1,k2", k1,k2
print "p-value for equivalence of fracs", stats.ttest_ind(p[0],p[1])
statistic = []
for i in xrange(N):
statistic.append(self._frac_difference(self._sample_wr(population,k1),\
self._sample_wr(population,k2)))
#print "statistic", statistic
dist_of_stat.append(statistic)
return dist_of_stat
def _true_mean_difference(self):
"""calculate the true mean difference between a pair of lists with percentage weight change"""
mean_diff = []
for x in self.pairs:
mean_diff.append(self._mean_difference(x[0],x[1]))
return mean_diff
def _true_frac_difference(self):
"""calculate the true frac difference between a pair of lists with percentage weight change"""
frac_diff = []
for x in self.pairs:
frac_diff.append(self._frac_difference(x[0],x[1]))
return frac_diff
def _mean(self,x):
return float(sum(x)) / float(len(x))
def _frac(self,list,threshold = -5, data=look_up):
value = []
for item in list:
if item < threshold:
value.append(item)
return (float(len(value))/float(len(list)))*100
def _mean_difference(self,x,y):
return float(self._mean(x) - self._mean(y))
def _frac_difference(self,x,y):
print "sets to consider", len(x), len(y)
print "p-value for equivalence of fracions", stats.ttest_ind(x,y)
return abs(float(self._frac(x) - self._frac(y)))
def _values(self,pair, metric):
"""returns the values for a given metric for each list in the pair"""
return [[float(look_up[str(metric)][int(n)]) for n in x] for x in pair]
def _sample_wr(self,population, k):
"""Chooses k random elements (with replacement) from a population"""
n = len(population)
_random, _int = random.random, int # speed hack
result = [None] * k
for i in xrange(k):
j = _int(_random() * n)
result[i] = population[j]
return result
def _hypothesis_test(self,x,org_stat):
value = []
for item in x:
if item >= org_stat:
value.append(item)
return (float(len(value))/float(len(x)))
def hypo(self):
"""hypothesis test"""
orig_test_statistics = self._true_frac_difference()
distribs = self.bootstrap()
print "should be 3 distributions", len(distribs)
values = []
for i in range(len(distribs)):
values.append(self._hypothesis_test(distribs[i],orig_test_statistics[i]))
print "test results fractions", values
return values
if __name__== "__main__":
pass
try:
import pyscho
pyscho.full()
except ImportError:
pass
#G = nx.read_gml("./master_adherent.gml")
#G = transform_labels_to_nx(G)
#Gcc = nx.connected_component_subgraphs(G)[0]
#datasets = [Gcc.nodes(),G.nodes()]
#obj = bootstrapmeans(datasets)
#t = obj._true_mean_difference()
#h = obj.bootstrap()
#m = obj.hypo()
print "m", m