-
Notifications
You must be signed in to change notification settings - Fork 0
/
clustering_tests.py
122 lines (105 loc) · 4.52 KB
/
clustering_tests.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import numpy as np
from scipy.stats import binom_test
'''def test_igraph_partitions(partition,pheno_preval,pheno_array):
partition_subgraphs = partition.subgraphs()
pvals = np.zeros(len(pheno_preval))
for subgraph in partition_subgraphs:
result_dict = {}
for vertext in subgraph.vs:
if pheno_array[vertext['name']] != -1:
if pheno_array[vertext['name']] in result_dict:
result_dict[pheno_array[vertext['name']]] += 1
else:
result_dict[pheno_array[vertext['name']]] = 1
cls_size = len(subgraph.vs)
for key in result_dict:
pval = binom_test(result_dict[key],cls_size,pheno_preval[key])
pval = np.abs(np.log10(pval))
if pvals[key] < pval:
pvals[key] = pval
return pvals'''
def test_igraph_partitions(parition,pheno_preval,pheno_array):
partition_subgraphs = parition.subgraphs()
pvals = np.zeros(len(pheno_preval))
for subgraph in partition_subgraphs:
result_dict = {}
for vertex in subgraph.vs:
# phenos = np.where(pheno_array[:,vertex['name']])[0]
phenos = pheno_array[int(vertex['name'])]
if len(phenos) != 0:
for pheno in phenos:
if pheno in result_dict:
result_dict[pheno] += 1
else:
result_dict[pheno] = 1
cls_size = len(subgraph.vs)
for key in result_dict:
pval = binom_test(result_dict[key],cls_size,pheno_preval[key])
pval = np.abs(np.log10(pval))
if pvals[key] < pval:
pvals[key] = pval
return pvals
def test_set_partitions(partition,pheno_preval, pheno_array):
pvals = np.zeros(len(pheno_preval))
for subgraph in partition:
result_dict = {}
for vertex in subgraph:
if pheno_array[vertex] != -1:
if pheno_array[vertex] in result_dict:
result_dict[pheno_array[vertex]] += 1
else:
result_dict[pheno_array[vertex]] = 1
cls_size = len(subgraph)
for key in result_dict:
pval = binom_test(result_dict[key],cls_size,pheno_preval[key])
pval = np.abs(np.log10(pval))
if pvals[key] < pval:
pvals[key] = pval
return pvals
def test_purity_set(partition_list,class_list,node_count):
total_sum = 0
for cluster in partition_list:
max = -1
for class_set in class_list:
pass
def calculate_sim(partition_list,class_list):
sims_list = []
for cluster in partition_list:
temp_sim = []
for node_set in class_list:
temp_sim.append(len(cluster.intersection(node_set)))
sims_list.append(temp_sim)
return sims_list
def calculate_purity(sim_list,total_count):
total_sum = 0
for overlap_list in sim_list:
total_sum += max(overlap_list)
return total_sum/total_count
def calculate_entropy(set_list,total_count):
return -1*np.sum([(len(cluster_set)/total_count)*np.log(len(cluster_set)/total_count) for cluster_set in set_list])
def calculate_NMI(sim_list,partition_list,class_list,total_count):
'''mutual_info = 0
for k in range(len(partition_list)):
for j in range(len(class_list)):
mutual_info += (sim_list[k][j]/total_count)*(np.log(total_count*sim_list[k][j])-np.log(len(partition_list[k])*len(class_list[j])))
#'''
mutual_info = np.sum([(sim_list[k][j]/total_count)*np.log(total_count*sim_list[k][j]/(len(partition_list[k])*len(class_list[j]))) for k in range(len(partition_list)) for j in range(len(class_list)) if sim_list[k][j] != 0])
return 2*mutual_info/(calculate_entropy(partition_list,total_count)+calculate_entropy(class_list,total_count))
def test_mcl_partitions(partition,pheno_preval,pheno_array):
pvals = np.zeros(len(pheno_preval))
for subgraph in partition:
result_dict = {}
for vertext in subgraph:
if pheno_array[vertext] != -1:
if pheno_array[vertext] in result_dict:
result_dict[pheno_array[vertext]] += 1
else:
result_dict[pheno_array[vertext]] = 1
cls_size = len(subgraph)
for key in result_dict:
pval = binom_test(result_dict[key], cls_size, pheno_preval[key])
pval = np.abs(np.log10(pval))
if pvals[key] < pval:
pvals[key] = pval
return pvals
#def test_cluster(member_list)