-
Notifications
You must be signed in to change notification settings - Fork 0
/
predict.py
182 lines (170 loc) · 7.88 KB
/
predict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
# -*- coding: utf-8 -*-
"""
Skript testet das vortrainierte Modell
@author: Christoph Hoog Antink, Maurice Rohr
"""
import csv
import scipy.io as sio
import numpy as np
from ecgdetectors import Detectors
import os
from typing import List, Tuple
import CNNModels.preprocess
import scipy
import pickle
import hrv
import CNNModels.icentiaDataProcessor
import scipy.signal as siglib
from keras.models import Sequential
import tensorflow.keras as keras
import tensorflow as tf
import neurokit2 as nk
import utilz
import pandas as pd
import xgboost as xgb
import warnings
###Signatur der Methode (Parameter und Anzahl return-Werte) darf nicht verändert werden
def predict_labels(ecg_leads : List[np.ndarray], fs : float, ecg_names : List[str], model_name : str='tree_model.sav',is_binary_classifier : bool=False) -> List[Tuple[str,str]]:
'''
Parameters
----------
model_name : str
Dateiname des Models. In Code-Pfad
ecg_leads : list of numpy-Arrays
EKG-Signale.
fs : float
Sampling-Frequenz der Signale.
ecg_names : list of str
eindeutige Bezeichnung für jedes EKG-Signal.
model_name : str
Name des Models, kann verwendet werden um korrektes Model aus Ordner zu laden
is_binary_classifier : bool
Falls getrennte Modelle für F1 und Multi-Score trainiert werden, wird hier übergeben,
welches benutzt werden soll
Returns
-------
predictions : list of tuples
ecg_name und eure Diagnose
'''
#------------------------------------------------------------------------------
# Euer Code ab hier
# predict with the xgboost models that are used in the final submission
if(model_name == 'xgboost_augmented.json'):
warnings.filterwarnings('ignore')
with open('dftemplate.pkl','rb') as target:
dftemplate = pickle.load(target) #import the template datafarme
#load the model
model = xgb.XGBClassifier()
model.load_model(model_name)
base_dataframe = pd.read_pickle('./base_dataframe.pkl')
results = []
for ecg_lead in ecg_leads:
signal = ecg_lead
#try to analyze the data with neurokit and calculate our additional features
try:
signals, info = nk.ecg_process(signal, sampling_rate=fs, method='neurokit') #preprocess with neurokit
analyzed = nk.ecg_analyze(signals, sampling_rate=fs)
own = utilz.ownFeatures(signals) #calculate additional features
#create a dataframe for the prediction by using the template dataframe to ensure correct order and number of features
analyzed = pd.concat([analyzed,own], axis=1)
analyzed.replace([np.inf, -np.inf], np.nan, inplace=True)
analyzed = pd.concat([dftemplate,analyzed], axis=0)
analyzed = analyzed[dftemplate.columns.to_list()].iloc[1].to_frame().T
#predict with model
prediction = model.predict(analyzed)
if(prediction == 0):
results.append('N')
else:
results.append('A')
except:
results.append('N') #if the signal could not be analyzed with neurokit mark it as normal
predictions = list(zip(ecg_names, results))
# predict with the spectrogram cnns
elif(model_name == 'spectro'):
spectrogram_list = []
for ecg_lead in ecg_leads:
signal = np.ravel(ecg_lead)
signal = CNNModels.icentiaDataProcessor.preprocessData(ecg_lead,9000,300,False) #preprocess data cutting or extending every signal to 9000 sample
f, t, Sxx = siglib.spectrogram(signal, fs=300, nfft=512, nperseg=64) #creating the spectogram
Sxx = 10*np.log10(Sxx+1e-12) #creating log scale
Sxx = np.reshape(Sxx, (Sxx.shape[0], Sxx.shape[1], 1))
spectrogram_list.append(Sxx)
del f, t, Sxx
np_spectogram_list = np.asarray(spectrogram_list)
model=keras.models.load_model('crossval3.h5') #load a model
predicted_label = model.predict(np_spectogram_list)
predicted_label = np.asarray(predicted_label)
predicted_label_return = []
for i in range(len(predicted_label)):
index = np.where(predicted_label[i] == np.amax(predicted_label[i]))
index = index[0]
if(index == 0):
predicted_label_return.append('N')
if(index == 1):
predicted_label_return.append('A')
predictions = list(zip(ecg_names, predicted_label_return))
#predict with the tree_model.sav (can also be used for the gs_grabost.sav model)
elif(model_name == 'tree_model.sav'):
#calculating the different features
hrz = hrv.HRV(fs)
detectors = Detectors(fs) # Initialisierung des QRS-Detektors
sdnn_array = np.array([]) # Initialisierung der Feature-Arrays
mnn_array = np.array([])
rrskew_array = np.array([])
rrkurt_array = np.array([])
sdsd_array = np.array([])
hr_array = np.array([])
rmssd_array = np.array([])
sdann_array = np.array([])
pNN20_array = np.array([])
pNN50_array = np.array([])
NN20_array = np.array([])
NN50_array = np.array([])
for ecg_lead in ecg_leads:
ecg_lead = CNNModels.preprocess.ecg_denoise_kalman(ecg_lead)
r_peaks = detectors.pan_tompkins_detector(ecg_lead) # Detektion der QRS-Komplexe
#print(len(r_peaks))
sdnn = np.std(np.diff(r_peaks)/fs*1000)
mnn = np.mean(np.diff(r_peaks)/fs*1000) # Berechnung der Standardabweichung der Schlag-zu-Schlag Intervalle (SDNN) in Millisekunden
rrskew = scipy.stats.skew(np.diff(r_peaks)/fs*1000)
rrkurt = scipy.stats.kurtosis(np.diff(r_peaks)/fs*1000)
sdsd = hrz.SDSD(r_peaks)
hr = hrz.HR(r_peaks)
rmssd = hrz.RMSSD(r_peaks)
if len(r_peaks)>1:
pNN20 = hrz.pNN20(r_peaks)
pNN50 = hrz.pNN50(r_peaks)
else:
pNN20 = np.nan
pNN50 = np.nan
NN20 = hrz.NN20(r_peaks)
NN50 = hrz.NN50(r_peaks)
#sdann = hrz.SDANN(r_peaks)
sdnn_array = np.append(sdnn_array,sdnn)
mnn_array = np.append(mnn_array, mnn)
rrskew_array = np.append(rrskew_array, rrskew)
rrkurt_array = np.append(rrkurt_array, rrkurt)
sdsd_array = np.append(sdsd_array, sdsd)
hr_array = np.append(hr_array, hr)
rmssd_array = np.append(rmssd_array, rmssd)
pNN20_array = np.append(pNN20_array, pNN20)
pNN50_array = np.append(pNN50_array, pNN50)
NN20_array = np.append(NN20_array, NN20)
NN50_array = np.append(NN50_array, NN50)
#sdann_array = np.append(sdann_array, sdann)
inputArray = list(zip(sdnn_array, mnn_array, rrskew_array, rrkurt_array, sdsd_array, hr_array, rmssd_array, pNN20_array, pNN50_array, NN20_array, NN50_array))
inputArray = np.array(inputArray)
inputArray[np.where(np.isfinite(inputArray)==False)] = 0.0
loaded_model = pickle.load(open(model_name, 'rb'))
results = loaded_model.predict(inputArray)
results = np.rint(results)
result= []
for k in range(len(results)):
if results[k]== 0.0:
result.append('N')
else:
result.append('A')
predictions = list(zip(ecg_names, result))
# #print(predictions)
#------------------------------------------------------------------------------
return predictions # Liste von Tupels im Format (ecg_name,label) - Muss unverändert bleiben!