forked from FanGeGo/xxxx
-
Notifications
You must be signed in to change notification settings - Fork 0
/
preprocess.py
44 lines (31 loc) · 1.14 KB
/
preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import numpy as np
import io_helper
random_data_dup = 10 # each sample randomly duplicated between 0 and 9 times, see dropin function
def dropin(X, y):
"""
The name suggests the inverse of dropout, i.e. adding more samples. See Data Augmentation section at
http://simaaron.github.io/Estimating-rainfall-from-weather-radar-readings-using-recurrent-neural-networks/
:param X: Each row is a training sequence
:param y: Tne target we train and will later predict
:return: new augmented X, y
"""
print("X shape:", X.shape)
print("y shape:", y.shape)
X_hat = []
y_hat = []
for i in range(0, len(X)):
for j in range(0, np.random.random_integers(0, random_data_dup)):
X_hat.append(X[i, :])
y_hat.append(y[i])
return np.asarray(X_hat), np.asarray(y_hat)
def preprocess():
arrayfile = "array_test.pickle"
array = io_helper.loadfrompickle(arrayfile)
x_train = array[:,:-1]
y_train = array[:,-1]
print ("The train data size is that ")
print (x_train.shape)
print (y_train.shape)
return (x_train,y_train)
if __name__ =="__main__":
preprocess()