-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaa222_finalproject_regression.py
289 lines (201 loc) · 14 KB
/
aa222_finalproject_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
import numpy as np
import matplotlib.pyplot as plt
import h5py
from tensorflow import keras,cast,float32,data
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from sklearn.metrics import mean_squared_error, accuracy_score
from tensorflow.keras.datasets import mnist
# import pdb
# LOADING A SPECIFIC DATASET INTO MATRIX FORM, USABLE BY NEURAL NETWORKS
def datasetLoad(inputSteps=15,traindata='pedestrian_traindata_ohio.hdf5',testdata='pedestrian_testdata_ohio.hdf5'):
#inputSteps is the number of time steps into which the trajectory data is divided: the first inputSteps-1 time steps are used as an input and the next time step is the prediction
f = h5py.File(traindata,'r')
g = h5py.File(testdata,'r')
k = f['trajectories'][()] - f['failures'][()] #number of successful trajectories, to be turned into training data pairs
print('Training model based on '+str(k)+' full trajectories.')
trajectorySteps = int(f['time'][()]*f['frequency'][()] + 1) #number of time steps in a complete trajectory (default: 151)
generalFirst = True #index to determine if current data point is the first being used
for i in range(k): #add data to input/output pairs for each trajectory in the dataset
# pdb.set_trace()
for j in range(trajectorySteps-inputSteps): #add input/output pairs
if generalFirst:
x = f['pedestrian_states'][j:j+inputSteps-1,:,i].flatten()
y = f['pedestrian_states'][j+inputSteps,:,i]
generalFirst = False
else:
x = np.vstack((x,f['pedestrian_states'][j:j+inputSteps-1,:,i].flatten()))
y = np.vstack((y,f['pedestrian_states'][j+inputSteps,:,i]))
print("input tensor shape (training): ",x.shape)
print("output label matrix shape (training): ",y.shape)
kTest = g['trajectories'][()] - g['failures'][()] #number of successful trajectories, to be turned into training data pairs
print('Validating models with '+str(kTest)+' full trajectories.')
for i in range(kTest): #add data to input/output pairs for each trajectory in the dataset
for j in range(trajectorySteps-inputSteps): #add input/output pairs
if i==0 and j ==0:
xTest = g['pedestrian_states'][j:j+inputSteps-1,:,i].flatten()
yTest = g['pedestrian_states'][j+inputSteps,:,i]
else:
xTest = np.vstack((xTest,g['pedestrian_states'][j:j+inputSteps-1,:,i].flatten()))
yTest = np.vstack((yTest,g['pedestrian_states'][j+inputSteps,:,i]))
print("input tensor shape (validation): ",xTest.shape)
print("output label matrix shape (validation): ",yTest.shape)
# Normalization: subtract mean from data without rescaling
xTrainMean = np.mean(x,axis=0)
xN = x - np.repeat(np.expand_dims(xTrainMean,0),np.shape(x)[0],axis=0)
xTestN = xTest - np.repeat(np.expand_dims(xTrainMean,0),np.shape(xTest)[0],axis=0)
return xN,y,xTestN,yTest
# LOADING MNIST DATASET, POPULAR FOR TESTING SMALL NEURAL NETWORKS
def mnistLoad():
(x_train,y),(x_test,yTest) = mnist.load_data()
x = np.zeros((np.shape(x_train)[0],np.prod(np.shape(x_train)[1:])))
xTest = np.zeros((np.shape(x_test)[0],np.prod(np.shape(x_test)[1:])))
for i in range(np.shape(x_train)[0]): #there is a far more elegant way to do this, which I'll look into if I have time
x[i,:] = x_train[i,:,:].flatten()
for j in range(np.shape(x_test)[0]):
xTest[j,:] = x_test[j,:,:].flatten()
print("input tensor shape (training): ",x.shape)
print("output label matrix shape (training): ",y.shape)
print("input tensor shape (validation): ",xTest.shape)
print("output label matrix shape (validation): ",yTest.shape)
# Normalization: subtract mean from data without rescaling
xTrainMean = np.mean(x,axis=0)
xN = x - np.repeat(np.expand_dims(xTrainMean,0),np.shape(x)[0],axis=0)
xTestN = xTest - np.repeat(np.expand_dims(xTrainMean,0),np.shape(xTest)[0],axis=0)
# # Normalization: subtract mean from data while rescaling
# xTrainStDev = np.std(x,axis=0)
# xN = ( x - np.repeat(np.expand_dims(xTrainMean,0),np.shape(x)[0],axis=0) )/ np.repeat(np.expand_dims(xTrainStDev + 1e-7,0),np.shape(x)[0],axis=0)
# xTestN = ( xTest - np.repeat(np.expand_dims(xTrainMean,0),np.shape(xTest)[0],axis=0) )/ np.repeat(np.expand_dims(xTrainStDev + 1e-7,0),np.shape(xTest)[0],axis=0)
return xN,y,xTestN,yTest
#DEFINIITION OF NEURAL NETWORK STRUCTURE
def BuildModel(architecture,activation_function='relu',regression = True):
input = architecture[0] #number of neurons matching input vector
output_dim = architecture[-1] #number of neurons matching output vector
#Quick verification that the input has SOMETHING
if architecture[1] == 0:
print("Error: the first layer is listed as having no neurons.")
return None
networkArch = [Dense(architecture[1],input_dim=input,activation=activation_function)] #initialization of the list describing network structure
#Parsing the network architecture...
maxLayers = len(architecture) #the length of the tuple is the maximum number of layers the generated network can have
layers = 2 #index used to iterate through the tuple describing network structure (minimum of one layer plus input)
while layers < maxLayers-1 and architecture[layers] != 0:
networkArch += [Dense(architecture[layers],activation=activation_function)]
layers += 1
if layers < maxLayers-1 and any(architecture[layers+1:-1]):
print("Error: a layer is listed as having nonzero neurons after a zero-neuron layer.")
return None
networkArch += [Dense(output_dim,activation='linear')] #output layer, with real-valued outputs
model = Sequential(networkArch) #definition of a sequential model with the layers described by the input tuple
if regression: #regression problem, where the outputs are continuously spaced
model.compile(
'adam', #Adam optimizer, for simplicity
loss ='mean_squared_error',
#metrics=['mean_squared_error'], #include this metric?
)
else: #categorization problem, where the outputs must choose between several available options
model.compile('adam',loss =keras.losses.SparseCategoricalCrossentropy(from_logits=True),metrics=[keras.metrics.SparseCategoricalAccuracy()],)
for i in range(len(networkArch)): #give names to each of the layers in the model, for future reference
model.layers[i]._name = 'layer_' + str(i)
return model
#BUILD, TRAIN AND EVALUATE NEURAL NETWORK OF A SPECIFIC ARCHITECTURE
# def evaluateModelDesign(model,architecture,x,y,xTest,yTest,save=True,training_epochs=10,regression=True,verbose=True): #"model" is supposed to be the output of the BuildModel function, seen above
def evaluateModelDesign(model,architecture,x,y,xTest,yTest,save=True,training_epochs=10,regression=True,verbose=True): #"model" is supposed to be the output of the BuildModel function, seen above
if training_epochs > 0: #training_epochs=0 will skip the training process and only evaluate the model performance on the test set, for a previously trained model
model.fit(
x, #training input data
y, #training output labels for supervised learning
epochs=training_epochs, #number of times to iterate over training data
validation_data =(xTest,yTest), #test set for model verification
)
# y_krm = model.predict(x)
# mse_krm = mean_squared_error(y,y_krm)
# print(mse_krm)
# plt.plot(y,label='original y')
# plt.plot(y_krm, label='predicted y')
# plt.legend()
# plt.show()
# plt.plot(yTest,label='original y (validation)')
# plt.plot(model.predict(xTest),label='predicted y (validation)')
# plt.legend()
# plt.show()
if regression:
score = mean_squared_error(yTest,model.predict(xTest))
else:
score = accuracy_score(yTest,np.argmax(model.predict(xTest),axis=1))
if verbose:
print('Mean squared error of model '+str(architecture)+' on validation data: ',score) #the script will take your word for whatever the specified architecture was, so be careful
if save:
model.save('regression_finalproject_'+str(architecture)+'.h5') #save complete model as a .h5 file, for reference, noting it is for a regression problem
return score
def loadModel(architecture):
model = keras.models.load_model('regression_finalproject_'+str(architecture)+'.h5') #this obviously won't work if there is no saved model in the folder with the specified architecture...
return model
#EVALUATION FUNCTION FOR PREVIOUSLY SAVED MODEL, WITH NO OPTION TO TRAIN DNN FURTHER OR SAVE NETWORK AS A SEPARATE FILE
def evaluateModel(model,architecture,xTest,yTest,regression=True,verbose=True):
# y_krm = model.predict(x)
# mse_krm = mean_squared_error(y,y_krm)
# print(mse_krm)
# plt.plot(y,label='original y')
# plt.plot(y_krm, label='predicted y')
# plt.legend()
# plt.show()
# plt.plot(yTest,label='original y (validation)')
# plt.plot(model.predict(xTest),label='predicted y (validation)')
# plt.legend()
# plt.show()
if regression:
score = mean_squared_error(yTest,model.predict(xTest))
else:
score = accuracy_score(yTest,np.argmax(model.predict(xTest),axis=1))
if verbose:
print('Mean squared error of model with structure '+str(architecture)+' on validation data: ',score)
return score
#SUBFUNCTION FOR PROXY SCORE CALCULATION: HAMMING DISTANCE
def distanceHamming(columnA,columnB): #function takes as input two binary column vectors
return float(sum(columnA != columnB)) #Hamming distance is the total number of entries where the columns are different
#SUBFUNCTION FOR PROXY SCORE CALCULATION: BINARY VECTORS OF RELU ACTIVATIONS, USING MODEL, FOR EACH SAMPLE
def reLUvectors(architecture,model,xSamples):
layers = len(architecture) - sum(tuple(x == y for x,y in zip(architecture,len(architecture)*(0,) ))) - 2 #number of layers in architecture that have more than zero neurons, excluding input
C = np.zeros((np.shape(xSamples)[0],sum(architecture[1:-1]))) #binary row vectors: one column per neuron in model with ReLU, one row per sample input; ReLU will make entries either positive or turn negative elements to 0
for i in range(layers):
layer_output = model.get_layer('layer_'+str(i)).output #recall name of layer to review
intermediate_model = keras.models.Model(inputs=model.input,outputs=layer_output) #intermediate model between model input and output we are reviewing
intermediate_prediction = intermediate_model.predict(xSamples)
C[:,sum(architecture[1:i+1]):sum(architecture[1:i+2])] = intermediate_prediction
C = 1.*(C > 0.) #conversion of activated outputs from all neurons into binary form
return C.T
#PROXY SCORE FOR A NEURAL NETWORK OF A SPECIFIC ARCHITECTURE
def evaluateModelProxy(architecture,xTest,numSamples):
if numSamples > np.shape(xTest)[0]:
print("Error: number of samples requested to be used for proxy exceeds number of samples in test dataset.")
return None, None
Na = sum(architecture[1:-1]) #total number of ReLU functions in network (neurons, excluding output)
xSamples = np.random.default_rng().choice(xTest,numSamples,replace=False) #each row of this matrix is a sample from the test dataset to be used in proxy
model = BuildModel(architecture) #the model is built and initialized, but not trained (also, this proxy function is designed for the ReLU activation function)
binaryVectors = reLUvectors(architecture,model,xSamples) #binary vectors indicating where the ReLU was active, or not, for each neuron (one row per neuron) for each test example (one column per example)
Kh = np.expand_dims(np.array([Na - distanceHamming(binaryVectors[:,0],binaryVectors[:,j]) for j in range(numSamples)]),0)
for i in range(1,numSamples):
Kh = np.vstack((Kh,np.expand_dims(np.array([Na - distanceHamming(binaryVectors[:,i],binaryVectors[:,j]) for j in range(numSamples)]),0)))
(sign, logscore) = np.linalg.slogdet(Kh)
proxyScore = logscore #for now, ignoring the sign of the determinant
return proxyScore, Kh
def evaluateModelProxyAlt(architecture,xTest,numSamples):
if numSamples > np.shape(xTest)[0]:
print("Error: number of samples requested to be used for proxy exceeds number of samples in test dataset.")
return None, None
Na = sum(architecture[1:-1]) #total number of ReLU functions in network (neurons, excluding output)
xSamples = np.random.default_rng().choice(xTest,numSamples,replace=False) #each row of this matrix is a sample from the test dataset to be used in proxy
model = BuildModel(architecture) #the model is built and initialized, but not trained (also, this proxy function is designed for the ReLU activation function)
binaryVectors = reLUvectors(architecture,model,xSamples) #binary vectors indicating where the ReLU was active, or not, for each neuron (one row per neuron) for each test example (one column per example)
Kh = np.expand_dims(np.array([Na - distanceHamming(binaryVectors[:,0],binaryVectors[:,j]) for j in range(numSamples)]),0)
for i in range(1,numSamples):
Kh = np.vstack((Kh,np.expand_dims(np.array([Na - distanceHamming(binaryVectors[:,i],binaryVectors[:,j]) for j in range(numSamples)]),0)))
logscore = np.log(np.sum(Kh)) #alternate logarithmic score function that favors network architectures predisposed to differentiate more between test inputs
# logscore = np.log(np.sum(Kh - Na*np.eye(np.shape(Kh)[0]))) #functionally equivalent to the one above, but slightly slower, use this if the intermediate numbers get so big the code can't differentiate between them
proxyScore = logscore
return proxyScore, Kh
#USING THE PROXY SCORE FUNCTION TO DEVELOP AN EXPLORATION DIRECTION IN HOOKE-JEEVES
def proxyStepDirection(Kh):
step = -1*np.dot(np.linalg.pinv(Kh) , np.ones((np.shape(Kh)[1],1)))
return step/np.linalg.norm(step)