-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
116 lines (108 loc) · 5.03 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import sys, os, csv
import numpy as np
import mysql.connector
from structureData import *
from diversityHeatmap import *
from diversity2csv import *
# Main script written by BDS.
'''
def string2bool(s):
if (s.lower() == "true" or s.lower() == "t"):
return True
else:
return False
'''
def calculate_one_metric_on_markers(path,xy_ind,log_flag,maxRange,nSpecies,simMat,metric,slide,region,branch):
#print('entering calculate_one_metric_on_markers() with slide %s, region %s, branch %s' % (slide,region,branch))
# Assemble the output file path.
inpath = path
path_nm, path_ext = os.path.splitext(os.path.basename(path))
output_nm = path_nm + '_diversity' + path_ext
outpath_final = os.path.join(os.path.dirname(path), output_nm)
# Get all the headers in the quantitation file.
f = open(path, 'r')
headers = next(csv.reader(f))
f.close()
#print('headers are:')
#print(headers)
# Calculate metrics on all the markers.
# Iterate through the headers.
i = 0
found_one = False
# Create database connection for results.
cnx = mysql.connector.connect(user='root',password='thrivemysql',host='thrive-mysql',database='thrivedb')
# Insert several entries into the metrics table.
cursor = cnx.cursor()
add_metric = "INSERT INTO heterogeneity_metric (slide_name, region_name, branch_name, metric_name, metric_value) VALUES (%s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE metric_value=%s"
# Processing all markers
for h in headers:
#print('header ' + str(i) + ': ' + h)
# Process all the headers that include the word 'Mean'
if h.find('Mean') > -1:
# Assemble the intermediate output file path.
if found_one == False:
inpath = path
else:
inpath = outpath
output_nm = path_nm + '_' + str(i) + path_ext
outpath = os.path.join(os.path.dirname(path), output_nm)
xy, feat = structureData(inpath, xy_ind, i, log_flag)
try:
heat, colors = diversityHeatmap(xy, feat, maxRange, nSpecies, simMat, metric)
except:
print('ERROR getting diversity metrics on ' + h)
outpath = inpath # Revert outpath to its previous value because we did not create a new output file
else:
#print('heat:')
#print(heat)
#print('np.mean(heat):')
#print(np.mean(heat))
prefix = h + '-' + metric
print('calculate_one_metric_on_markers(), h: %s, metric: %s, prefix: %s, mean: %f, median: %f, std: %f' % (h, metric, prefix, np.mean(heat), np.median(heat), np.std(heat)))
diversity2csv(prefix, inpath, outpath, heat, colors)
# Write metrics to database.
if (np.isfinite(np.mean(heat))):
mean_data = (slide, region, branch, prefix+'-mean', str(np.mean(heat)), str(np.mean(heat)))
cursor.execute(add_metric, mean_data)
if (np.isfinite(np.median(heat))):
median_data = (slide, region, branch, prefix+'-median', str(np.median(heat)), str(np.median(heat)))
cursor.execute(add_metric, median_data)
if (np.isfinite(np.std(heat))):
std_data = (slide, region, branch, prefix+'-std', str(np.std(heat)), str(np.std(heat)))
cursor.execute(add_metric, std_data)
cnx.commit()
#print('SUCCESS getting diversity metrics on ' + h)
found_one = True
i=i+1 # end of for loop
# Copy file at outpath to outpath_final.
os.system('cp ' + outpath + ' ' + outpath_final)
cnx.close()
#print('Done processing all columns')
# Parameters:
# path - location of marker quantitation input file
# log_flag_bool - whether to perform a log transform
# maxRange - distance from each cell to examine for diversity
# nSpecies - number of different species / clusters / categories of cells
# simMat_bool - how similar the species are to each other, true == identity matrix, false == default matrix
# metric - which diversity metric, allowed values are: QE, Shannon, Simpson
def metricsForCSV(path, log_flag_bool, maxRange, nSpecies, simMat_bool, metric, slide, region, branch):
# xy_ind is the csv column that contains X data. Y data is assumed to come next.
xy_ind = 1
if log_flag_bool == True:
log_flag='1'
else:
log_flag='0'
if simMat_bool == True:
simMat = 'identity'
else:
simMat = 'default'
calculate_one_metric_on_markers(path,xy_ind,log_flag,maxRange,nSpecies,simMat,metric,slide,region,branch)
'''
'''
if __name__ == '__main__':
if len(sys.argv) < 9:
print("Not enough arguments.")
sys.exit()
# The only argument initially is the path to the input file.
print sys.argv[1]
metricsForCSV(sys.argv[1], bool(sys.argv[2]), int(sys.argv[3]), int(sys.argv[4]), bool(sys.argv[5]), sys.argv[6], sys.argv[7], sys.argv[8], sys.argv[9])