diff --git a/Training/nominal_model.py b/Training/nominal_model.py index 04452c6..156fd50 100644 --- a/Training/nominal_model.py +++ b/Training/nominal_model.py @@ -92,7 +92,7 @@ def __init__(self,nclasses,isParametric=False,useLSTM=True,useWasserstein=False, self.useWasserstein = useWasserstein with tf.variable_scope("cpf_conv"): self.cpf_conv = Sequence(scope='cpf_conv') - self.cpf_conv.add(keras.layers.BatchNormalization()) + #self.cpf_conv.add(keras.layers.BatchNormalization()) self.cpf_conv.add(Conv(64,1,1,options=options,name="cpf_conv1")) self.cpf_conv.add(Conv(32,1,1,options=options,name="cpf_conv2")) self.cpf_conv.add(Conv(32,1,1,options=options,name="cpf_conv3")) @@ -100,7 +100,7 @@ def __init__(self,nclasses,isParametric=False,useLSTM=True,useWasserstein=False, with tf.variable_scope("npf_conv"): self.npf_conv = Sequence(scope='npf_conv') - self.npf_conv.add(keras.layers.BatchNormalization()) + #self.npf_conv.add(keras.layers.BatchNormalization()) self.npf_conv.add(Conv(32,1,1,options=options,name="npf_conv1")) self.npf_conv.add(Conv(16,1,1,options=options,name="npf_conv2")) self.npf_conv.add(Conv(16,1,1,options=options,name="npf_conv3")) @@ -108,7 +108,7 @@ def __init__(self,nclasses,isParametric=False,useLSTM=True,useWasserstein=False, with tf.variable_scope("sv_conv"): self.sv_conv = Sequence(scope='sv_conv') - self.sv_conv.add(keras.layers.BatchNormalization()) + #self.sv_conv.add(keras.layers.BatchNormalization()) self.sv_conv.add(Conv(32,1,1,options=options,name="sv_conv1")) self.sv_conv.add(Conv(16,1,1,options=options,name="sv_conv2")) self.sv_conv.add(Conv(16,1,1,options=options,name="sv_conv3")) @@ -128,7 +128,7 @@ def __init__(self,nclasses,isParametric=False,useLSTM=True,useWasserstein=False, self.global_norm = keras.layers.BatchNormalization() self.full_features = Sequence(scope='features') self.full_features.add(keras.layers.Concatenate()) - self.full_features.add(Dense(200,options=options,name="features1")) + self.full_features.add(Dense(200,options=options,name="features1",activation=None)) self.full_features.add(keras.layers.Activation('tanh',name="features2")) #self.full_features.add(keras.layers.GaussianNoise(0.1)) ''' @@ -158,7 +158,7 @@ def gradientReverse(x): else: backward = tf.negative(x*tf.exp(tf.abs(x))) forward = tf.identity(x) - return backward + tf.stop_gradient(forward - backward) + return (backward + tf.stop_gradient(forward - backward)) self.domain_prediction = Sequence(scope='domain_prediction') self.domain_prediction.add(keras.layers.Lambda(gradientReverse)) @@ -185,9 +185,9 @@ def extractFeatures(self,globalvars,cpf,npf,sv,gen=None): globalvars_norm = self.global_norm(globalvars) if self.isParametric: - full_features = self.full_features([globalvars_norm,gen,cpf_lstm,npf_lstm,sv_lstm]) + full_features = self.full_features([globalvars,gen,cpf_lstm,npf_lstm,sv_lstm]) else: - full_features = self.full_features([globalvars_norm,cpf_lstm,npf_lstm,sv_lstm]) + full_features = self.full_features([globalvars,cpf_lstm,npf_lstm,sv_lstm]) return full_features diff --git a/Training/training.py b/Training/training.py index 1a0025b..4df9a21 100644 --- a/Training/training.py +++ b/Training/training.py @@ -564,6 +564,18 @@ def random_ctau(start,end,v): h = ((h >> 16) ^ h) * 0x45d9f3b h = (h >> 16) ^ h return start+((17+h+h/100+h/10000)%(end-start)) + + +lr_per_epoch = [] +class_weight_per_epoch = [] +domain_weight_per_epoch = [] + +avgLoss_train_per_epoch = [] +avgLoss_test_per_epoch = [] +avgLoss_train_domain_per_epoch = [] +avgLoss_test_domain_per_epoch = [] + + while (epoch < num_epochs): @@ -595,7 +607,8 @@ def random_ctau(start,end,v): #modelTest = setupModelDiscriminator() classLossWeight = 1. - domainLossWeight = max(0,epoch-2)/50.+(max(0,epoch-2)/75.)**2. #0.7-0.7*math.exp(-0.03*max(0,epoch-2)**1.5)+0.05*max(0,epoch-2) + domainLossWeight = max(0,epoch-2)/25.+(max(0,epoch-2)/50.)**2. #0.7-0.7*math.exp(-0.03*max(0,epoch-2)**1.5)+0.05*max(0,epoch-2) + #domainLossWeight = max(0,epoch-2)/25.+(max(0,epoch-2)/25.)**2. #classLossWeight = 0.3+0.7*math.exp(-0.03*max(0,epoch-2)**1.5) #since learning rate is decreased increase DA weight at higher epochs @@ -605,6 +618,11 @@ def random_ctau(start,end,v): classLossWeight = 1. domainLossWeight = 0 + lr_per_epoch.append(learning_rate_val) + class_weight_per_epoch.append(classLossWeight) + domain_weight_per_epoch.append(domainLossWeight) + + def wasserstein_loss(x,y): return K.mean(x*y) @@ -621,7 +639,7 @@ def wasserstein_loss(x,y): optClass = keras.optimizers.Adam(lr=learning_rate_val, beta_1=0.9, beta_2=0.999) modelClassDiscriminator.compile(optClass, loss=classLossFctType, metrics=['accuracy'], - loss_weights=[classLossWeight]) + loss_weights=[1.]) classLossFct = modelClassDiscriminator.total_loss #includes also regularization loss classInputGradients = tf.gradients(classLossFct,modelClassDiscriminator.inputs) @@ -630,7 +648,7 @@ def wasserstein_loss(x,y): optDomain = keras.optimizers.Adam(lr=learning_rate_val, beta_1=0.9, beta_2=0.999) modelDomainDiscriminator.compile(optDomain, loss=domainLossFctType, metrics=['accuracy'], - loss_weights=[domainLossWeight]) + loss_weights=[1.]) domainLossFct = modelDomainDiscriminator.total_loss #includes also regularization loss domainInputGradients = tf.gradients(domainLossFct,modelDomainDiscriminator.inputs) @@ -649,7 +667,7 @@ def wasserstein_loss(x,y): optDomainFrozen = keras.optimizers.Adam(lr=learning_rate_val, beta_1=0.9, beta_2=0.999) modelDomainDiscriminatorFrozen.compile(optDomainFrozen, loss=domainLossFctType, metrics=['accuracy'], - loss_weights=[domainLossWeight]) + loss_weights=[1.]) if epoch == 0: print "class network" @@ -695,6 +713,10 @@ def wasserstein_loss(x,y): total_loss_train_domain = 0 total_loss_test_domain = 0 + + + + start_time = time.time() @@ -718,7 +740,7 @@ def wasserstein_loss(x,y): continue if isParametric: - train_inputs_class = [train_batch_value['gen'][:, 0:1], + train_inputs_class = [train_batch_value['gen'], train_batch_value['globalvars'], train_batch_value['cpf'], train_batch_value['npf'], @@ -752,24 +774,14 @@ def wasserstein_loss(x,y): train_inputs_class[igrad]+=direction*classInputGradientsVal[igrad] if not noDA: - ''' - train_batch_value_domain_1 = sess.run(train_batch_da) - train_batch_value_domain_2 = sess.run(train_batch_da) - if train_batch_value_domain_1['num'].shape[0]==0 or train_batch_value_domain_2['num'].shape[0]==0: - continue - train_batch_value_domain = {} - - iterda = np.random.normal(0,0.1) - for k in train_batch_value_domain_1.keys(): - train_batch_value_domain[k] = train_batch_value_domain_1[k]+iterda*(train_batch_value_domain_2[k]-train_batch_value_domain_1[k]) - ''' + train_batch_value_domain = sess.run(train_batch_da) #ctau = np.random.uniform(-2,5,size=(train_batch_value_domain.shape[0],1)) if isParametric: train_inputs_domain = [ #ctau, - train_batch_value['gen'][:, 0:1], #use the SAME liftimes as in MC!!! + train_batch_value['gen'], #use the SAME liftimes as in MC!!! train_batch_value_domain['globalvars'], train_batch_value_domain['cpf'], train_batch_value_domain['npf'], @@ -811,7 +823,13 @@ def wasserstein_loss(x,y): train_inputs_class, train_batch_value["truth"] ) - train_outputs_domain = [0.,0.] + train_outputs_domain = modelDomainDiscriminatorFrozen.test_on_batch( + train_inputs_domain, + (2.*train_batch_value_domain["isData"]-1) if useWasserstein else train_batch_value_domain["isData"], + sample_weight=train_da_weight + ) + + elif (epoch==0 and (step>30 and step<=60)) or (epoch>0 and step<=30): #train domain discriminator only while keeping features frozen train_outputs_domain = modelDomainDiscriminatorFrozen.train_on_batch( @@ -819,7 +837,12 @@ def wasserstein_loss(x,y): (2.*train_batch_value_domain["isData"]-1) if useWasserstein else train_batch_value_domain["isData"], sample_weight=train_da_weight ) - train_outputs = [0.,0.] + train_outputs = modelClassDiscriminator.test_on_batch( + train_inputs_class, + train_batch_value["truth"] + ) + + else: #finally train both discriminators together train_outputs_fused = modelFusedDiscriminator.train_on_batch( @@ -832,7 +855,7 @@ def wasserstein_loss(x,y): ) train_outputs = train_outputs_fused[1],train_outputs_fused[3] train_outputs_domain = train_outputs_fused[2],train_outputs_fused[4] - + else: #train only class branch if noDA train_outputs = modelClassDiscriminator.train_on_batch( @@ -842,6 +865,7 @@ def wasserstein_loss(x,y): train_outputs_domain = [0,0] + ptArray = np.hstack( (ptArray, train_batch_value["globalvars"][:, 0])) @@ -952,7 +976,7 @@ def wasserstein_loss(x,y): continue if isParametric: - test_inputs = [test_batch_value['gen'][:, 0], + test_inputs = [test_batch_value['gen'], test_batch_value['globalvars'], test_batch_value['cpf'], test_batch_value['npf'], @@ -966,7 +990,6 @@ def wasserstein_loss(x,y): test_outputs = modelClassDiscriminator.test_on_batch(test_inputs, test_batch_value["truth"]) test_prediction = modelClassDiscriminator.predict_on_batch(test_inputs) - #print train_batch_value_domain["isData"][:10] #print train_batch_value_domain["xsecweight"][:10] @@ -1068,6 +1091,11 @@ def wasserstein_loss(x,y): else: avgLoss_train_domain = total_loss_train_domain/nTrainDomain avgLoss_test_domain = total_loss_test_domain/nTestDomain + + avgLoss_train_per_epoch.append(avgLoss_train) + avgLoss_test_per_epoch.append(avgLoss_test) + avgLoss_train_domain_per_epoch.append(avgLoss_train_domain) + avgLoss_test_domain_per_epoch.append(avgLoss_test_domain) if epoch == 0: @@ -1181,7 +1209,69 @@ def wasserstein_loss(x,y): f = open(os.path.join(outputFolder, "model_epoch.stat"), "a") f.write(str(epoch)+";"+str(learning_rate_val)+";"+str(avgLoss_train)+";"+str(avgLoss_test)+";"+str(avgLoss_train_domain)+";"+str(avgLoss_test_domain)+";"+str(M_score)+"\n") f.close() - + + cv = ROOT.TCanvas("cv"+str(idis)+str(random.random()),"",800,750) + ''' + cv.Divide(1,3,0,0) + cv.GetPad(1).SetPad(0.0, 0.0, 1.0, 1.0) + cv.GetPad(2).SetPad(0.0, 0.0, 1.0, 1.0) + cv.GetPad(3).SetPad(0.0, 0.0, 1.0, 1.0) + cv.GetPad(1).SetFillStyle(4000) + cv.GetPad(2).SetFillStyle(4000) + cv.GetPad(3).SetFillStyle(4000) + cv.GetPad(1).SetMargin(0.135, 0.04, 0.6, 0.06) + cv.GetPad(2).SetMargin(0.135, 0.04, 0.27, 0.42) + cv.GetPad(3).SetMargin(0.135, 0.04, 0.15, 0.75) + #cv.GetPad(1).SetLogy(1) + cv.GetPad(2).SetLogy(1) + cv.GetPad(3).SetLogy(1) + cv.cd(1) + ''' + cv.SetMargin(0.135, 0.04, 0.13, 0.04) + epocharray = np.linspace(1,len(lr_per_epoch),len(lr_per_epoch)) + axis1 = ROOT.TH2F("axis1"+str(random.random()),";Epoch;Loss", + 50,0,len(lr_per_epoch)+1, + 50, + 0.85*min(avgLoss_train_per_epoch+avgLoss_test_per_epoch+avgLoss_train_domain_per_epoch+avgLoss_test_domain_per_epoch), + 1.15*max(avgLoss_train_per_epoch+avgLoss_test_per_epoch+avgLoss_train_domain_per_epoch+avgLoss_test_domain_per_epoch) + ) + axis1.GetXaxis().SetTickLength(0.015/(1-cv.GetLeftMargin()-cv.GetRightMargin())) + axis1.GetYaxis().SetTickLength(0.015/(1-cv.GetTopMargin()-cv.GetBottomMargin())) + axis1.Draw("AXIS") + + g_train_class = ROOT.TGraph(len(epocharray),epocharray,np.array(avgLoss_train_per_epoch)) + g_train_class.SetLineWidth(2) + g_train_class.SetLineColor(ROOT.kAzure-4) + g_train_class.SetMarkerColor(ROOT.kAzure-4) + g_train_class.SetMarkerSize(1.2) + g_train_class.Draw("PL") + g_test_class = ROOT.TGraph(len(epocharray),epocharray,np.array(avgLoss_test_per_epoch)) + g_test_class.SetLineWidth(4) + g_test_class.SetLineStyle(2) + g_test_class.SetLineColor(ROOT.kBlue) + g_test_class.Draw("L") + g_train_domain = ROOT.TGraph(len(epocharray),epocharray,np.array(avgLoss_train_domain_per_epoch)) + g_train_domain.SetLineWidth(2) + g_train_domain.SetLineColor(ROOT.kOrange+7) + g_train_domain.SetMarkerColor(ROOT.kOrange+7) + g_train_domain.SetMarkerSize(1.2) + g_train_domain.Draw("PL") + g_test_domain = ROOT.TGraph(len(epocharray),epocharray,np.array(avgLoss_test_domain_per_epoch)) + g_test_domain.SetLineWidth(4) + g_test_domain.SetLineStyle(2) + g_test_domain.SetLineColor(ROOT.kRed+1) + g_test_domain.Draw("L") + cv.Print(os.path.join(outputFolder,"epoch_" + str(epoch),"loss.pdf")) + ''' + lr_per_epoch = [] + class_weight_per_epoch = [] + domain_weight_per_epoch = [] + + avgLoss_train_per_epoch = [] + avgLoss_test_per_epoch = [] + avgLoss_train_domain_per_epoch = [] + avgLoss_test_domain_per_epoch = [] + ''' if epoch > 1 and previous_train_loss < avgLoss_train: learning_rate_val = learning_rate_val*0.85 print "Decreasing learning rate to %.4e" % (learning_rate_val)