forked from mgiulini/pymap
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pymap.py
110 lines (94 loc) · 3.9 KB
/
pymap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
"""pymap main file."""
import math
import time
import numpy as np
import pandas as pd
from scipy.special import binom
# local modules
from libclust import check_volume, get_clust
from libentropy import (calculate_entropies, calculate_pbar_indices,
calculate_smap_fast, calculate_smap_inf)
from libio import output_mappings, parse_arguments, system_parameters_setup
def main():
"""Define main function."""
start_time = time.time()
args = parse_arguments()
# read_data
cleaned_pars = system_parameters_setup(args.parameters)
with open(cleaned_pars["input_filename"], "r") as f:
ncols = len(f.readline().split(','))
print("number of columns in the dataset = ", ncols)
df = pd.read_csv(
cleaned_pars["input_filename"],
sep=",",
usecols=range(1, ncols)
)
print("df shape", df.shape)
print("df.columns", df.columns)
n_at = df.shape[1]
# creating fully detailed mapping
print("total number of mappings = ", math.pow(2, n_at) - 1)
at_mapping = np.array(range(n_at))
print("df.columns[at_mapping]", df.columns[at_mapping])
# atomistic clustering: the original dataframe is divided in microstates
at_clust = get_clust(df, at_mapping)
print("at_clust", at_clust)
print("atomistic columns", at_clust.columns)
print("at_clust shape", at_clust.shape)
pr = at_clust["records"]/df.shape[0] # at. probability distribution
# check volume
V = check_volume(df, ncols)
# atomistic quantities
hs_at, hk_at = calculate_entropies(at_clust)
print("at_clust.columns[at_mapping]", at_clust.columns[at_mapping])
print("atomistic resolution ", hs_at) # computing fully at. resolution
print("atomistic relevance ", hk_at) # computing fully at. resolution
cg_mappings = dict()
cg_mappings_order = []
# going through the levels of coarse-graining
for ncg in range(1, n_at+1):
elap_time = time.time() - start_time
print("ncg = ", ncg, ", elapsed time (seconds) = %8.6lf" % (elap_time))
cg_count = int(binom(n_at, ncg))
print("cg_count", cg_count)
k = 0
max_range = min(cg_count, cleaned_pars["max_binom"])
print(f"max_range = {max_range}")
fixed_n_mappings = []
while k < max_range:
mapping = np.random.choice(at_mapping, ncg, replace=False)
mapping.sort()
key = tuple(mapping)
if key not in cg_mappings.keys():
k += 1
if args.verbose:
print("adding key", key, " k = ", k)
cg_clust = get_clust(df, mapping)
hs, hk = calculate_entropies(cg_clust)
smap_inf = calculate_smap_inf(n_at,
ncg,
hs_at,
hs,
V)
p_bar = calculate_pbar_indices(at_clust,
cg_clust,
df.shape[0],
mapping)
smap = calculate_smap_fast(mapping, pr, p_bar)
cg_mappings[key] = (len(mapping),
mapping,
list(at_clust.columns[mapping]),
hs,
hk,
smap,
smap_inf)
fixed_n_mappings.append(key)
fixed_n_mappings.sort()
# extending the original list
cg_mappings_order.extend(fixed_n_mappings)
output_mappings(cg_mappings,
cg_mappings_order,
cleaned_pars["output_filename"])
print("Total execution time (seconds) %8.6lf" % (time.time() - start_time))
# running main
main()