-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdrosa.py
165 lines (134 loc) · 4.32 KB
/
drosa.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# JP Onnela
# April 20, 2021
# Edited May 17, 2021 by Jonathan Larson
import networkx as nx
import random
import scipy.stats as ss
import time
def generate_DMC(q_mod, q_con, n):
"""Generate DMC model realization given parameters."""
G = nx.Graph()
G.add_edge(0,1)
new_nodes = list(range(2,n))
anchor_nodes = []
for v in new_nodes:
u = random.choice(list(G.nodes()))
anchor_nodes.append(u)
G.add_node(v)
# duplication
G.add_edges_from([(v,w) for w in G.neighbors(u)])
# mutation
for w in list(G.neighbors(u)):
if ss.bernoulli.rvs(q_mod):
edge = random.choice([(v,w), (u,w)])
G.remove_edge(*edge)
# complementation
if ss.bernoulli.rvs(q_con):
G.add_edge(u,v)
return (G, new_nodes, anchor_nodes)
def deconstruct_DMC(G, alpha, beta):
"""Deconstruct a DMC graph over a single step."""
# reverse complementation
if G.has_edge(alpha, beta):
G.remove_edge(alpha, beta)
w = 1
else:
w = 0
# reverse mutation
alpha_neighbors = set(G.neighbors(alpha))
beta_neighbors = set(G.neighbors(beta))
x = len(alpha_neighbors & beta_neighbors)
y = len(alpha_neighbors | beta_neighbors)
for neighbor in alpha_neighbors:
G.add_edge(beta, neighbor)
# reverse duplication
G.remove_node(alpha)
return (w, x, y)
def find_min_uni_pair(G):
"""Find pair of nodes that have minimal cardinality of the union of their neighbors."""
alpha = None
beta = None
union_size = G.number_of_nodes()
nodes = list(G.nodes())
random.shuffle(nodes)
for u in nodes:
for v in nodes:
if u > v:
u_neighbors = set(G.neighbors(u))
v_neighbors = set(G.neighbors(v))
y = len(u_neighbors | v_neighbors)
if G.has_edge(u,v):
y = y - 2
if y < union_size:
union_size = y
alpha = u
beta = v
return (alpha, beta, union_size)
def deconstruct(G):
"""Deconstruct the graph until."""
alphas = []
betas = []
W = 0
X = 0
Y = 0
(alpha, beta, union_size) = find_min_uni_pair(G)
while (not alpha is None and not beta is None):
print("Number of nodes remaining:", G.number_of_nodes())
alphas.append(alpha)
betas.append(beta)
(w, x, y) = deconstruct_DMC(G, alpha, beta)
W += w
X += x
Y += y
(alpha, beta, union_size) = find_min_uni_pair(G)
return (alphas, betas, W, X, Y)
def estimate_parms(W, X, Y, n):
"""Compute estimates of q_mod and q_con parameters."""
q_mod_hat = 1 - X / Y
q_con_hat = W / (n - 1)
return (q_mod_hat, q_con_hat)
def read_edgelist(input_file):
"""Read edgelist from input file"""
G = nx.Graph()
counter = 0
for line in open(input_file):
counter += 1
line = line.rstrip().split("\t")
node_i = line[0]
node_j = line[1]
G.add_edge(node_i, node_j)
return (G, counter)
def print_stats(G, new_nodes, anchor_nodes):
"""Print out some statistics."""
print("Nodes:", G.nodes())
print("Edges:", G.edges())
print("New nodes (alpha):", new_nodes)
print("Anchor nodes (beta):", anchor_nodes)
def save_results(output_file):
F = open(output_file, "w")
# alphas
for alpha in alphas:
F.write(str(alpha) + " ")
F.write("\n")
# betas
for beta in betas:
F.write(str(beta) + " ")
F.write("\n")
# others
F.write(str(W) + " " + str(X) + " " + str(Y) + " " + str(q_mod_hat) + " " + str(q_con_hat))
F.close()
# ----------------------------------------------------------------
# input and output files
input_file = "drosa.tsv"
# read data
(G, counter) = read_edgelist(input_file)
G.remove_edges_from(nx.selfloop_edges(G))
print(G.number_of_edges())
# degenerate graph
n = G.number_of_nodes()
start = time.time()
(alphas, betas, W, X, Y) = deconstruct(G)
end = time.time()
print("Time elapsed:", end - start)
(q_mod_hat, q_con_hat) = estimate_parms(W, X, Y, n)
print("Parameter estimates:", q_mod_hat, q_con_hat)