Skip to content

Commit

Permalink
Merge pull request #49 from matt-komm/master
Browse files Browse the repository at this point in the history
fix training
  • Loading branch information
vcepaitis authored Aug 26, 2019
2 parents efbacee + 7de8ced commit 4974c34
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 30 deletions.
14 changes: 7 additions & 7 deletions Training/nominal_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,23 +92,23 @@ def __init__(self,nclasses,isParametric=False,useLSTM=True,useWasserstein=False,
self.useWasserstein = useWasserstein
with tf.variable_scope("cpf_conv"):
self.cpf_conv = Sequence(scope='cpf_conv')
self.cpf_conv.add(keras.layers.BatchNormalization())
#self.cpf_conv.add(keras.layers.BatchNormalization())
self.cpf_conv.add(Conv(64,1,1,options=options,name="cpf_conv1"))
self.cpf_conv.add(Conv(32,1,1,options=options,name="cpf_conv2"))
self.cpf_conv.add(Conv(32,1,1,options=options,name="cpf_conv3"))
self.cpf_conv.add(Conv(8,1,1,options=options,name="cpf_conv4"))

with tf.variable_scope("npf_conv"):
self.npf_conv = Sequence(scope='npf_conv')
self.npf_conv.add(keras.layers.BatchNormalization())
#self.npf_conv.add(keras.layers.BatchNormalization())
self.npf_conv.add(Conv(32,1,1,options=options,name="npf_conv1"))
self.npf_conv.add(Conv(16,1,1,options=options,name="npf_conv2"))
self.npf_conv.add(Conv(16,1,1,options=options,name="npf_conv3"))
self.npf_conv.add(Conv(4,1,1,options=options,name="npf_conv4"))

with tf.variable_scope("sv_conv"):
self.sv_conv = Sequence(scope='sv_conv')
self.sv_conv.add(keras.layers.BatchNormalization())
#self.sv_conv.add(keras.layers.BatchNormalization())
self.sv_conv.add(Conv(32,1,1,options=options,name="sv_conv1"))
self.sv_conv.add(Conv(16,1,1,options=options,name="sv_conv2"))
self.sv_conv.add(Conv(16,1,1,options=options,name="sv_conv3"))
Expand All @@ -128,7 +128,7 @@ def __init__(self,nclasses,isParametric=False,useLSTM=True,useWasserstein=False,
self.global_norm = keras.layers.BatchNormalization()
self.full_features = Sequence(scope='features')
self.full_features.add(keras.layers.Concatenate())
self.full_features.add(Dense(200,options=options,name="features1"))
self.full_features.add(Dense(200,options=options,name="features1",activation=None))
self.full_features.add(keras.layers.Activation('tanh',name="features2"))
#self.full_features.add(keras.layers.GaussianNoise(0.1))
'''
Expand Down Expand Up @@ -158,7 +158,7 @@ def gradientReverse(x):
else:
backward = tf.negative(x*tf.exp(tf.abs(x)))
forward = tf.identity(x)
return backward + tf.stop_gradient(forward - backward)
return (backward + tf.stop_gradient(forward - backward))

self.domain_prediction = Sequence(scope='domain_prediction')
self.domain_prediction.add(keras.layers.Lambda(gradientReverse))
Expand All @@ -185,9 +185,9 @@ def extractFeatures(self,globalvars,cpf,npf,sv,gen=None):
globalvars_norm = self.global_norm(globalvars)

if self.isParametric:
full_features = self.full_features([globalvars_norm,gen,cpf_lstm,npf_lstm,sv_lstm])
full_features = self.full_features([globalvars,gen,cpf_lstm,npf_lstm,sv_lstm])
else:
full_features = self.full_features([globalvars_norm,cpf_lstm,npf_lstm,sv_lstm])
full_features = self.full_features([globalvars,cpf_lstm,npf_lstm,sv_lstm])

return full_features

Expand Down
136 changes: 113 additions & 23 deletions Training/training.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,18 @@ def random_ctau(start,end,v):
h = ((h >> 16) ^ h) * 0x45d9f3b
h = (h >> 16) ^ h
return start+((17+h+h/100+h/10000)%(end-start))


lr_per_epoch = []
class_weight_per_epoch = []
domain_weight_per_epoch = []

avgLoss_train_per_epoch = []
avgLoss_test_per_epoch = []
avgLoss_train_domain_per_epoch = []
avgLoss_test_domain_per_epoch = []



while (epoch < num_epochs):

Expand Down Expand Up @@ -595,7 +607,8 @@ def random_ctau(start,end,v):
#modelTest = setupModelDiscriminator()

classLossWeight = 1.
domainLossWeight = max(0,epoch-2)/50.+(max(0,epoch-2)/75.)**2. #0.7-0.7*math.exp(-0.03*max(0,epoch-2)**1.5)+0.05*max(0,epoch-2)
domainLossWeight = max(0,epoch-2)/25.+(max(0,epoch-2)/50.)**2. #0.7-0.7*math.exp(-0.03*max(0,epoch-2)**1.5)+0.05*max(0,epoch-2)
#domainLossWeight = max(0,epoch-2)/25.+(max(0,epoch-2)/25.)**2.

#classLossWeight = 0.3+0.7*math.exp(-0.03*max(0,epoch-2)**1.5)
#since learning rate is decreased increase DA weight at higher epochs
Expand All @@ -605,6 +618,11 @@ def random_ctau(start,end,v):
classLossWeight = 1.
domainLossWeight = 0

lr_per_epoch.append(learning_rate_val)
class_weight_per_epoch.append(classLossWeight)
domain_weight_per_epoch.append(domainLossWeight)


def wasserstein_loss(x,y):
return K.mean(x*y)

Expand All @@ -621,7 +639,7 @@ def wasserstein_loss(x,y):
optClass = keras.optimizers.Adam(lr=learning_rate_val, beta_1=0.9, beta_2=0.999)
modelClassDiscriminator.compile(optClass,
loss=classLossFctType, metrics=['accuracy'],
loss_weights=[classLossWeight])
loss_weights=[1.])

classLossFct = modelClassDiscriminator.total_loss #includes also regularization loss
classInputGradients = tf.gradients(classLossFct,modelClassDiscriminator.inputs)
Expand All @@ -630,7 +648,7 @@ def wasserstein_loss(x,y):
optDomain = keras.optimizers.Adam(lr=learning_rate_val, beta_1=0.9, beta_2=0.999)
modelDomainDiscriminator.compile(optDomain,
loss=domainLossFctType, metrics=['accuracy'],
loss_weights=[domainLossWeight])
loss_weights=[1.])

domainLossFct = modelDomainDiscriminator.total_loss #includes also regularization loss
domainInputGradients = tf.gradients(domainLossFct,modelDomainDiscriminator.inputs)
Expand All @@ -649,7 +667,7 @@ def wasserstein_loss(x,y):
optDomainFrozen = keras.optimizers.Adam(lr=learning_rate_val, beta_1=0.9, beta_2=0.999)
modelDomainDiscriminatorFrozen.compile(optDomainFrozen,
loss=domainLossFctType, metrics=['accuracy'],
loss_weights=[domainLossWeight])
loss_weights=[1.])

if epoch == 0:
print "class network"
Expand Down Expand Up @@ -695,6 +713,10 @@ def wasserstein_loss(x,y):

total_loss_train_domain = 0
total_loss_test_domain = 0





start_time = time.time()

Expand All @@ -718,7 +740,7 @@ def wasserstein_loss(x,y):
continue

if isParametric:
train_inputs_class = [train_batch_value['gen'][:, 0:1],
train_inputs_class = [train_batch_value['gen'],
train_batch_value['globalvars'],
train_batch_value['cpf'],
train_batch_value['npf'],
Expand Down Expand Up @@ -752,24 +774,14 @@ def wasserstein_loss(x,y):
train_inputs_class[igrad]+=direction*classInputGradientsVal[igrad]

if not noDA:
'''
train_batch_value_domain_1 = sess.run(train_batch_da)
train_batch_value_domain_2 = sess.run(train_batch_da)
if train_batch_value_domain_1['num'].shape[0]==0 or train_batch_value_domain_2['num'].shape[0]==0:
continue
train_batch_value_domain = {}
iterda = np.random.normal(0,0.1)
for k in train_batch_value_domain_1.keys():
train_batch_value_domain[k] = train_batch_value_domain_1[k]+iterda*(train_batch_value_domain_2[k]-train_batch_value_domain_1[k])
'''

train_batch_value_domain = sess.run(train_batch_da)
#ctau = np.random.uniform(-2,5,size=(train_batch_value_domain.shape[0],1))

if isParametric:
train_inputs_domain = [
#ctau,
train_batch_value['gen'][:, 0:1], #use the SAME liftimes as in MC!!!
train_batch_value['gen'], #use the SAME liftimes as in MC!!!
train_batch_value_domain['globalvars'],
train_batch_value_domain['cpf'],
train_batch_value_domain['npf'],
Expand Down Expand Up @@ -811,15 +823,26 @@ def wasserstein_loss(x,y):
train_inputs_class,
train_batch_value["truth"]
)
train_outputs_domain = [0.,0.]
train_outputs_domain = modelDomainDiscriminatorFrozen.test_on_batch(
train_inputs_domain,
(2.*train_batch_value_domain["isData"]-1) if useWasserstein else train_batch_value_domain["isData"],
sample_weight=train_da_weight
)


elif (epoch==0 and (step>30 and step<=60)) or (epoch>0 and step<=30):
#train domain discriminator only while keeping features frozen
train_outputs_domain = modelDomainDiscriminatorFrozen.train_on_batch(
train_inputs_domain,
(2.*train_batch_value_domain["isData"]-1) if useWasserstein else train_batch_value_domain["isData"],
sample_weight=train_da_weight
)
train_outputs = [0.,0.]
train_outputs = modelClassDiscriminator.test_on_batch(
train_inputs_class,
train_batch_value["truth"]
)


else:
#finally train both discriminators together
train_outputs_fused = modelFusedDiscriminator.train_on_batch(
Expand All @@ -832,7 +855,7 @@ def wasserstein_loss(x,y):
)
train_outputs = train_outputs_fused[1],train_outputs_fused[3]
train_outputs_domain = train_outputs_fused[2],train_outputs_fused[4]

else:
#train only class branch if noDA
train_outputs = modelClassDiscriminator.train_on_batch(
Expand All @@ -842,6 +865,7 @@ def wasserstein_loss(x,y):
train_outputs_domain = [0,0]




ptArray = np.hstack(
(ptArray, train_batch_value["globalvars"][:, 0]))
Expand Down Expand Up @@ -952,7 +976,7 @@ def wasserstein_loss(x,y):
continue

if isParametric:
test_inputs = [test_batch_value['gen'][:, 0],
test_inputs = [test_batch_value['gen'],
test_batch_value['globalvars'],
test_batch_value['cpf'],
test_batch_value['npf'],
Expand All @@ -966,7 +990,6 @@ def wasserstein_loss(x,y):
test_outputs = modelClassDiscriminator.test_on_batch(test_inputs, test_batch_value["truth"])
test_prediction = modelClassDiscriminator.predict_on_batch(test_inputs)


#print train_batch_value_domain["isData"][:10]
#print train_batch_value_domain["xsecweight"][:10]

Expand Down Expand Up @@ -1068,6 +1091,11 @@ def wasserstein_loss(x,y):
else:
avgLoss_train_domain = total_loss_train_domain/nTrainDomain
avgLoss_test_domain = total_loss_test_domain/nTestDomain

avgLoss_train_per_epoch.append(avgLoss_train)
avgLoss_test_per_epoch.append(avgLoss_test)
avgLoss_train_domain_per_epoch.append(avgLoss_train_domain)
avgLoss_test_domain_per_epoch.append(avgLoss_test_domain)

if epoch == 0:

Expand Down Expand Up @@ -1181,7 +1209,69 @@ def wasserstein_loss(x,y):
f = open(os.path.join(outputFolder, "model_epoch.stat"), "a")
f.write(str(epoch)+";"+str(learning_rate_val)+";"+str(avgLoss_train)+";"+str(avgLoss_test)+";"+str(avgLoss_train_domain)+";"+str(avgLoss_test_domain)+";"+str(M_score)+"\n")
f.close()


cv = ROOT.TCanvas("cv"+str(idis)+str(random.random()),"",800,750)
'''
cv.Divide(1,3,0,0)
cv.GetPad(1).SetPad(0.0, 0.0, 1.0, 1.0)
cv.GetPad(2).SetPad(0.0, 0.0, 1.0, 1.0)
cv.GetPad(3).SetPad(0.0, 0.0, 1.0, 1.0)
cv.GetPad(1).SetFillStyle(4000)
cv.GetPad(2).SetFillStyle(4000)
cv.GetPad(3).SetFillStyle(4000)
cv.GetPad(1).SetMargin(0.135, 0.04, 0.6, 0.06)
cv.GetPad(2).SetMargin(0.135, 0.04, 0.27, 0.42)
cv.GetPad(3).SetMargin(0.135, 0.04, 0.15, 0.75)
#cv.GetPad(1).SetLogy(1)
cv.GetPad(2).SetLogy(1)
cv.GetPad(3).SetLogy(1)
cv.cd(1)
'''
cv.SetMargin(0.135, 0.04, 0.13, 0.04)
epocharray = np.linspace(1,len(lr_per_epoch),len(lr_per_epoch))
axis1 = ROOT.TH2F("axis1"+str(random.random()),";Epoch;Loss",
50,0,len(lr_per_epoch)+1,
50,
0.85*min(avgLoss_train_per_epoch+avgLoss_test_per_epoch+avgLoss_train_domain_per_epoch+avgLoss_test_domain_per_epoch),
1.15*max(avgLoss_train_per_epoch+avgLoss_test_per_epoch+avgLoss_train_domain_per_epoch+avgLoss_test_domain_per_epoch)
)
axis1.GetXaxis().SetTickLength(0.015/(1-cv.GetLeftMargin()-cv.GetRightMargin()))
axis1.GetYaxis().SetTickLength(0.015/(1-cv.GetTopMargin()-cv.GetBottomMargin()))
axis1.Draw("AXIS")

g_train_class = ROOT.TGraph(len(epocharray),epocharray,np.array(avgLoss_train_per_epoch))
g_train_class.SetLineWidth(2)
g_train_class.SetLineColor(ROOT.kAzure-4)
g_train_class.SetMarkerColor(ROOT.kAzure-4)
g_train_class.SetMarkerSize(1.2)
g_train_class.Draw("PL")
g_test_class = ROOT.TGraph(len(epocharray),epocharray,np.array(avgLoss_test_per_epoch))
g_test_class.SetLineWidth(4)
g_test_class.SetLineStyle(2)
g_test_class.SetLineColor(ROOT.kBlue)
g_test_class.Draw("L")
g_train_domain = ROOT.TGraph(len(epocharray),epocharray,np.array(avgLoss_train_domain_per_epoch))
g_train_domain.SetLineWidth(2)
g_train_domain.SetLineColor(ROOT.kOrange+7)
g_train_domain.SetMarkerColor(ROOT.kOrange+7)
g_train_domain.SetMarkerSize(1.2)
g_train_domain.Draw("PL")
g_test_domain = ROOT.TGraph(len(epocharray),epocharray,np.array(avgLoss_test_domain_per_epoch))
g_test_domain.SetLineWidth(4)
g_test_domain.SetLineStyle(2)
g_test_domain.SetLineColor(ROOT.kRed+1)
g_test_domain.Draw("L")
cv.Print(os.path.join(outputFolder,"epoch_" + str(epoch),"loss.pdf"))
'''
lr_per_epoch = []
class_weight_per_epoch = []
domain_weight_per_epoch = []
avgLoss_train_per_epoch = []
avgLoss_test_per_epoch = []
avgLoss_train_domain_per_epoch = []
avgLoss_test_domain_per_epoch = []
'''
if epoch > 1 and previous_train_loss < avgLoss_train:
learning_rate_val = learning_rate_val*0.85
print "Decreasing learning rate to %.4e" % (learning_rate_val)
Expand Down

0 comments on commit 4974c34

Please sign in to comment.