-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy paththresh_fcs.py
72 lines (52 loc) · 2.45 KB
/
thresh_fcs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
"""
Created on 7 August 2018
@author: Isaac Wong
@email: [email protected]
The script and all the code within is licensed under GNU GENERAL PUBLIC LICENSE
"""
import os
import sys
import numpy as np
import pandas as pd
from skimage.measure import label
if __name__ == "__main__":
root_path = sys.argv[1] # The folder storing data.csv
num_control = int(sys.argv[2]) # The number of control you measured
sd_thresh = sys.argv[3].split() # The threshold of number of sd from the mean
# Create a result directory if no result directory is found
result_path = os.path.join(root_path, 'Results')
if not os.path.exists(result_path):
os.mkdir(result_path)
df = pd.read_csv(os.path.join(root_path, 'data.csv'), index_col=0) # Take in the data.csv from parsing the csv file
# Calculate the average of mean of each control recording
mean = 0
for i in range(num_control):
col_name = df.columns[i]
mean += df[col_name].mean()
mean = mean / num_control
# Calculate the average of sd of each control recording
sd = 0
for i in range(num_control):
col_name = df.columns[i]
sd += df[col_name].std()
sd = sd / num_control
data_list = []
for item in sd_thresh: # Loop through the sd value for the threshold
item = float(item) # Turning string into float for the value
peak_profiles = [item, ] # Initializing a list that will store the peak
threshold = mean + item * sd # Calculate the threshold
df_threshed = df - threshold # Mask the data with threshold
#df_threshed.to_csv(os.path.join(result_path, 'raw_data(thresh-mean+{}sd).csv'.format(item))) # Save the thresholded data
df_threshed[df_threshed <= 0] = 0 # Mask any data point below threshold as zero
for col in df_threshed.columns: # Loop through all columns in the data
temp_arr = label(df_threshed[col].as_matrix()) # Label any contiguous data as a single patch
num_peak = np.max(temp_arr) # Find the number of peak counted
peak_profiles.append(num_peak)
data_list.append(peak_profiles)
if len(sd_thresh) == 1:
data_list = np.array(data_list, dtype=np.int64).reshape((1, len(peak_profiles)))
else:
data_list = np.array(data_list, dtype=np.int64)
# Save the peak profile
np.savetxt(os.path.join(result_path, 'peak_count.csv'), \
data_list, delimiter=',')