-
Notifications
You must be signed in to change notification settings - Fork 3
/
data_providers.py
executable file
·54 lines (42 loc) · 1.7 KB
/
data_providers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function
import numpy as np
import pandas as pd
def data_providers(x_data_file='./data/Brain_Integ_X.csv',
y_data_file='./data/Brain_Integ_Y.csv'):
"""
This function reads the data file and extracts the features and labelled
values.
Then according to that patient is dead, only those observations would be
taken care
for deep learning trainig.
Args:
data_file: list of strings representing the paths of input files. Here the input features and ground truth values are separated in 2 files.
Returns:
`Numpy array`, extracted feature columns and label column.
Example:
>>> read_dataset()
( [[2.3, 2.4, 6.5],[2.3, 5.4,3.3]], [12, 82] )
"""
data_feed = pd.read_csv(x_data_file, skiprows=[0], header=None)
labels_feed = pd.read_csv(y_data_file, skiprows=[1], header=0)
survival = labels_feed['Survival']
censored = labels_feed['Censored']
survival = survival.values
censored = censored.values
data = data_feed.values
data = np.float32(data)
censored_survival = survival[censored == 1]
censored_features = data[censored == 1]
censored_data = censored[censored == 1]
y = np.asarray(censored_survival, dtype=np.int32)
x = np.asarray(censored_features)
c = np.asarray(censored_data, dtype=np.int32)
print('Shape of X : ', x.shape)
print('Shape of Y : ', y.shape)
print('Shape of C : ', c.shape)
return (x, y, c)
if __name__ == '__main__':
X_DATA_FILE = './data/Brain_Integ_X.csv'
Y_DATA_FILE = './data/Brain_Integ_Y.csv'
data_x, data_y, c = data_providers(X_DATA_FILE, Y_DATA_FILE)