-
Notifications
You must be signed in to change notification settings - Fork 0
/
intersection_sets_by_triads.py
140 lines (77 loc) · 4.79 KB
/
intersection_sets_by_triads.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#!/usr/bin/env python
'''
Code to read the pickle files with the set of users that are good guys accorging
to different definitions, and calculate the intersections
Created by Julia Poncela, on January 2015.
'''
import pickle
import numpy
from scipy import stats
import itertools
import random
def main():
list_thresholds=[0.25,0.75]
list_att=["good", "bad"]
list_regions=["lowerHarmony","higherHarmony", "PD", "higherPD", "lowerPD", "SH","SD"]
list_type_definitions= ["../Results/list_good_guys_lowerHarmony_threshold_coop0.75.pickle","../Results/list_good_guys_lowerHarmony_threshold_coop0.75.pickle","../Results/list_good_guys_lowerHarmony_threshold_coop0.75.pickle"] #"lowerHarmony","higherHarmony", "PD", "higherPD", "lowerPD", "SH","SD"
# example: list_good_guys_lowerHarmony_threshold_coop0.75.pickle
filename_all="../Results/list_all_users.pickle"
list_all_users=pickle.load(open(filename_all, 'rb'))
list_files=[]
for threshold in list_thresholds:
for att in list_att:
for region in list_regions:
# if "PD" in region: # for the Harmony, i am only interested in these values
# if att=="good":
# threshold=0.75
# else:
# threshold=0.25
file1="../Results/list_"+att+"_guys_"+region+"_threshold_coop"+str(threshold)+".pickle"
if file1 not in list_files:
list_files.append(file1)
cont=0
for item in itertools.combinations(list_files, 3):
group1_name=item[0]
group2_name=item[1]
group3_name=item[2]
region1=group1_name.split("_guys_")[1].split("_threshold_coop")[0].replace("higher","").replace("lower","")
region2=group2_name.split("_guys_")[1].split("_threshold_coop")[0].replace("higher","").replace("lower","")
region3=group3_name.split("_guys_")[1].split("_threshold_coop")[0].replace("higher","").replace("lower","")
if region1 != region2 and region1 != region3 and region2 != region3 :
list1=pickle.load(open(group1_name, 'rb'))
list2=pickle.load(open(group2_name, 'rb'))
list3=pickle.load(open(group3_name, 'rb'))
intersection=float(len(list(set(list1)& set(list2)& set(list3))))
min_size=float(min([len(list1),len(list2),len(list3)]))
percent= intersection/min_size*100.
if percent <=15. or intersection <=20:
# if percent >=75. or intersection >=100:
print item[0].replace("../Results/list_","").replace(".pickle",""), " size:", len(list1)
print item[1].replace("../Results/list_","").replace(".pickle",""), " size:", len(list2)
print item[2].replace("../Results/list_","").replace(".pickle",""), " size:", len(list3)
print " Intersection between them: ", intersection, " (of a max of:",min_size ,")", percent, "% "
list_random_intersection=[]
list_random_min_size=[]
list_random_percent=[]
for i in range(1000):
random_list1=random.sample(list_all_users, len(list1))
random_list2=random.sample(list_all_users, len(list2))
random_list3=random.sample(list_all_users, len(list3))
random_intersection=float(len(list(set(random_list1)& set(random_list2)& set(random_list3))))
list_random_intersection.append(random_intersection)
random_min_size=float(min([len(random_list1),len(random_list2),len(random_list3)]))
list_random_min_size.append(random_min_size)
random_percent=random_intersection/random_min_size*100.
list_random_percent.append(random_percent)
avg_random_intersection=numpy.mean(list_random_intersection)
zscore_size=(intersection-avg_random_intersection)/numpy.std(list_random_intersection)
print " Avg Intersection between random sets of same size: ", avg_random_intersection, " (of a max of:",numpy.mean(list_random_min_size) ,")", numpy.mean(list_random_percent), "% zscore of intersect. size:", zscore_size, "\n\n"
######################################
######################################
######################################
if __name__ == '__main__':
# if len(sys.argv) > 1:
# graph_filename = sys.argv[1]
main()
#else:
# print "Usage: python script.py "