-
Notifications
You must be signed in to change notification settings - Fork 0
/
linreg.py
145 lines (111 loc) · 5.27 KB
/
linreg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
from scipy.stats import linregress
import numpy as np
from global_func import get_data, get_gate_data
import matplotlib.pyplot as plt
from scipy.stats import t
import os
import csv
# Functions
def get_avg(peptide, data):
"""
Get the average mfi for a peptide in the data provided
:param peptide: the peptide string
:param data: the data to be checked
:return: the average mfi of the peptide in the data list
"""
res = []
for dict_item in data:
if peptide in dict_item:
res.append(float(dict_item[peptide]))
return round(sum(res) / len(res), 3)
def perform_linear_regression(data_min, data_max, elisa_data, analysis, gate, elisa_type):
"""
Performs linear regression with the given data
:param data_min: the minimum value of the gate data
:param data_max: the maximum value of the gate data
:param elisa_data: the elisa data
:param analysis: analysis string (PBMCs or TILs)
:param gate: the gate (CD4, CD8, GD)
:param elisa_type: the ELISA analysis type (TIL or WBA)
:return: None
"""
reg_array = np.random.uniform(low=float(data_min), high=float(data_max), size=(len(elisa_data),))
res = linregress(reg_array, elisa_data)
print(f'R-squared: {res.rvalue**2:.6f}')
tinv = lambda p, df: abs(t.ppf(p / 2, df))
ts = tinv(0.05, len(reg_array) - 2)
print(f"slope (95%): {res.slope:.6f} +/- {ts * res.stderr:.6f}")
print(f"intercept (95%): {res.intercept:.6f}"
f" +/- {ts * res.intercept_stderr:.6f}")
extracted_data = {'R-Squared': round(res.rvalue**2, 6),
'Slope': f'{round(res.slope, 6)} +/- {round(ts + res.stderr, 6)}',
'Intercept': f'{round(res.intercept, 6)} +/- {round(ts * res.intercept_stderr, 6)}'}
filepath = f'LinearRegression/{gate}_IFN-g_{analysis}_vs_ELISA_{elisa_type}'
if not os.path.exists(filepath):
os.makedirs(filepath)
# Write data to sample file
with open(f'{filepath}/data.csv', 'w', newline='\n') as f:
w = csv.writer(f)
w.writerow(extracted_data.keys())
w.writerow(extracted_data.values())
plt.plot(reg_array, elisa_data, 'o', label='data points')
plt.plot(reg_array, res.intercept + res.slope * reg_array, 'r', label='fitted line')
if gate == 'GD':
plt.title(f'γδ+ IFN-γ+ {analysis} VS ELISA {elisa_type}', fontsize=15)
else:
plt.title(f'{gate}+ IFN-γ+ {analysis} VS ELISA {elisa_type}', fontsize=15)
# plt.xticks([])
# plt.yticks([])
if gate == 'GD':
plt.xlabel(f'% γδ+ IFN-γ+ from ICS {analysis}')
else:
plt.xlabel(f'% {gate}+ IFN-γ+ from ICS {analysis}')
plt.ylabel(f'% IFN-γ+ Supernatant Concentration from ELISA {elisa_type}', fontsize=8)
plt.legend()
plt.savefig(f'{filepath}/graph.png')
plt.clf()
print()
# ---------------------------------------------------------------------------------
# Execution starts here
# If results directorydoes not exist, create it
if not os.path.exists('LinearRegression'):
os.makedirs('LinearRegression')
# get elisa data
elisa_wba = get_data('ELISA', 'Reactions', 'WBA', '')
elisa_til = get_data('ELISA', 'Reactions', 'TIL', '')
print(f'Elisa WBA: {elisa_wba}')
print(f'Elisa TIL: {elisa_til}')
# Rebuild elisa data into a dictionary to take only the average of the peptides it holds
final_elisa_wba = {}
for dict_item in elisa_wba:
for k in dict_item.keys():
if k not in final_elisa_wba:
final_elisa_wba[k] = get_avg(k, elisa_wba)
final_elisa_til = {}
for dict_item in elisa_til:
for k in dict_item.keys():
if k not in final_elisa_til:
final_elisa_til[k] = get_avg(k, elisa_til)
print(f'Elisa WBA Values: {list(final_elisa_wba.values())}')
print(f'Elisa TIL Values: {list(final_elisa_til.values())}')
print()
# PBMCs gate percentages vs ELISA WBA
# CD8
cd8_pbmcs_data = [float(list(entry.values())[0]) for entry in get_gate_data('cd8', 'FLOW', 'ICS', 'PBMCs', 'gate_pct')]
perform_linear_regression(min(cd8_pbmcs_data), max(cd8_pbmcs_data), list(final_elisa_wba.values()), 'PBMCs', 'CD8', 'WBA')
# CD4
cd4_pbmcs_data = [list(entry.values())[0] for entry in get_gate_data('cd4', 'FLOW', 'ICS', 'PBMCs', 'gate_pct')]
perform_linear_regression(min(cd4_pbmcs_data), max(cd4_pbmcs_data), list(final_elisa_wba.values()), 'PBMCs', 'CD4', 'WBA')
# GD
gd_pbmcs_data = [list(entry.values())[0] for entry in get_gate_data('gd', 'FLOW', 'ICS', 'PBMCs', 'gate_pct')]
perform_linear_regression(min(gd_pbmcs_data), max(gd_pbmcs_data), list(final_elisa_wba.values()), 'PBMCs', 'GD', 'WBA')
# TILs gate percentages vs ELISA TIL
# CD8
cd8_tils_data = [list(entry.values())[0] for entry in get_gate_data('cd8', 'FLOW', 'ICS', 'TILs', 'gate_pct')]
perform_linear_regression(min(cd8_tils_data), max(cd8_tils_data), list(final_elisa_til.values()), 'TILs', 'CD8', 'TIL')
# CD4
cd4_tils_data = [list(entry.values())[0] for entry in get_gate_data('cd4', 'FLOW', 'ICS', 'TILs', 'gate_pct')]
perform_linear_regression(min(cd4_tils_data), max(cd4_tils_data), list(final_elisa_til.values()), 'TILs', 'CD4', 'TIL')
# GD
gd_tils_data = [list(entry.values())[0] for entry in get_gate_data('gd', 'FLOW', 'ICS', 'TILs', 'gate_pct')]
perform_linear_regression(min(gd_tils_data), max(gd_tils_data), list(final_elisa_til.values()), 'TILs', 'GD', 'TIL')