-
Notifications
You must be signed in to change notification settings - Fork 0
/
eda.py
30 lines (23 loc) · 885 Bytes
/
eda.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import plot
import decomposition
import load
import manifold
import pandas as pd
import clustering
from scipy import sparse as sp
import numpy as np
if __name__ == "__main__":
sparse = load.load_covariates("covariates.csv")
pc = decomposition.decompose_svd(sparse, n_pc=100)
np.savetxt("pc_100.csv", pc, delimiter=",")
svd = decomposition.get_svd()
plot.plot_svd_redundancy(svd)
pc = pd.read_csv("pc_100.csv")
outcome = load.load_outcome("outcome.csv")
manifold_df = manifold.fit_manifold(pc, outcome, technique="isomap", dim=3)
manifold_df.to_csv("isomap_pc_100_3d.csv", sep=",")
manifold_df = pd.read_csv("isomap_pc_100_3d.csv")
plot.plot_manifold(manifold_df)
cluster_out = clustering.fit_cluster(manifold_df, n_clusters=5,
technique="agglomerative")
plot.plot_cluster(cluster_out)