From c319d8398aec70cdafb3f54c1fea7704ac1138bd Mon Sep 17 00:00:00 2001 From: simonge Date: Tue, 7 May 2024 19:19:04 +0100 Subject: [PATCH] Updated to work locally at least --- benchmarks/LOWQ2/config.yml | 6 +- benchmarks/LOWQ2/training/Snakefile | 108 +++++++++++++++--- .../LOWQ2/training/TaggerRegressionEICrecon.C | 19 +-- 3 files changed, 103 insertions(+), 30 deletions(-) diff --git a/benchmarks/LOWQ2/config.yml b/benchmarks/LOWQ2/config.yml index c3a7876c..3382ffa0 100644 --- a/benchmarks/LOWQ2/config.yml +++ b/benchmarks/LOWQ2/config.yml @@ -1,5 +1,5 @@ +# Run Snakemake for the training training: - # Training configuration goes here + + -analysis: - # Analysis configuration goes here diff --git a/benchmarks/LOWQ2/training/Snakefile b/benchmarks/LOWQ2/training/Snakefile index 7788e862..13e7a3ca 100644 --- a/benchmarks/LOWQ2/training/Snakefile +++ b/benchmarks/LOWQ2/training/Snakefile @@ -11,47 +11,117 @@ S3 = S3RemoteProvider( secret_access_key=os.environ["S3_SECRET_KEY"], ) +EVENTS_DIRECTORY = "/scratch/EIC/Events/S3in/" +SIM_DIRECTORY = "/scratch/EIC/G4out/S3processed/" +RECO_DIRECTORY = "/scratch/EIC/ReconOut/S3in/" +RECO_DIRECTORY_P = "/scratch/EIC/ReconOut/S3processed/" +MODEL_DIRECTORY = "/scratch/EIC/LowQ2Model/" -SIM_DIRECTORY = "/scratch/EIC/G4out/S3/" -RECON_DIRECTORY = "/scratch/EIC/ReconOut/S3/" -MODEL_DIRECTORY = "/scratch/EIC/LowQ2Model/" -REMOTE_DIRECTORY = "eictest/EPIC/EVGEN/SIDIS/pythia6-eic/1.0.0/10x100/q2_0to1/" -FILE_BASE = "pythia_ep_noradcor_10x100_q2_0.000000001_1.0_run" -XML_FILE = "/home/simong/EIC/epic/epic_18x275.xml" +REMOTE_EVENTS_DIRECTORY = "EPIC/EVGEN/SIDIS/pythia6-eic/1.0.0/10x100/q2_0to1/" +REMOTE_RECO_DIRECTORY = "EPIC/RECO/24.02.1/epic_craterlake/SIDIS/pythia6-eic/1.0.0/10x100/q2_0to1/" +S3_DIR = "eictest/" +XROOTD_SERVER = "root://dtn-eic.jlab.org/" +XROOTD_DIR = "/work/eic2/" -rule download_input: +EVENT_EXTENSION = ".ab.hepmc3.tree.root" +SIM_EXTENSION = ".edm4hep.root" +RECO_EXTENSION = ".eicrecon.tree.edm4eic.root" + +FILE_BASE = "pythia_ep_noradcor_10x100_q2_0.000000001_1.0_run" +XML_FILE = "/home/simong/EIC/epic/install/share/epic/epic_craterlake_10x100.xml" +XML_FILE_TAGGER = "/home/simong/EIC/epic/install/share/epic/epic_ip6.xml" +XML_FILE_EDIT = "/home/simong/EIC/epic/epic_edit.xml" +BEAM_ENERGY = "10" + +rule download_event_input: input: - S3.remote(REMOTE_DIRECTORY+FILE_BASE+"{index}"+EVENT_EXTENSION), + S3.remote(REMOTE_EVENTS_DIRECTORY+FILE_BASE+"{index}"+EVENT_EXTENSION), output: EVENTS_DIRECTORY+FILE_BASE+"{index}"+EVENT_EXTENSION, run: shutil.move(input[0], output[0]) + +def remote_file_exists(server,url): + try: + subprocess.check_output(['xrdfs', server, 'stat', url]) + return True + except subprocess.CalledProcessError: + return False + +rule run_simulation_tagger: + params: + XML=XML_FILE_EDIT, + input=lambda wildcards: XROOTD_SERVER+XROOTD_DIR+REMOTE_EVENTS_DIRECTORY+FILE_BASE+wildcards.index+EVENT_EXTENSION if remote_file_exists(XROOTD_SERVER,XROOTD_DIR+REMOTE_EVENTS_DIRECTORY+FILE_BASE+wildcards.index+EVENT_EXTENSION) else None, + output: + SIM_DIRECTORY+FILE_BASE+"{index}_tagger"+SIM_EXTENSION, + shell: """ +npsim \ + --inputFiles {params.input} \ + --outputFile {output[0]} \ + --compactFile {params.XML} \ + --runType run \ + --numberOfEvents 100000 \ + --physics.list FTFP_BERT \ + --field.eps_min 5e-06 \ + --field.eps_max 1e-04 \ + --physics.rangecut 50 \ +""" + +rule download_recon: + input: + S3.remote(S3_DIR+REMOTE_RECO_DIRECTORY+FILE_BASE+"{runindex}"+".ab."+"{subindex}"+RECO_EXTENSION), + output: + RECO_DIRECTORY+FILE_BASE+"{runindex}"+".ab."+"{subindex}"+RECO_EXTENSION, + run: + shutil.move(input[0], output[0]) + rule run_reconstruction: + params: + XML=XML_FILE, + beam_energy=BEAM_ENERGY, + collections="TaggerTrackerProjectedTracks,MCScatteredElectrons,MCParticles,EventHeader", input: - SIM_DIRECTORY+FILE_BASE+"{index}"+SIM_EXTENSION, - "/home/simong/EIC/EICrecon/bin/eicrecon", + expand( + SIM_DIRECTORY+FILE_BASE+"{index}_tagger"+SIM_EXTENSION, + index=range(1,10), + ), + output: + RECO_DIRECTORY_P+FILE_BASE+".fab"+RECO_EXTENSION, + shell: """ +/home/simong/EIC/EICrecon/bin/eicrecon {input} -Pdd4hep:xml_files={params.XML} -Ppodio:output_include_collections={params.collections} -Ppodio:output_file={output[0]} -PLOWQ2:LowQ2Trajectories:electron_beamE={params.beam_energy} +""" + +rule run_reconstruction_remote: params: XML=XML_FILE, - collections="LowQ2Tracks,ScatteredElectron", + beam_energy=BEAM_ENERGY, + collections="TaggerTrackerProjectedTracks,MCScatteredElectrons,MCParticles,EventHeader", + stringValue=RECO_DIRECTORY+FILE_BASE+"*.ab.*"+RECO_EXTENSION, + input: + expand( + RECO_DIRECTORY+FILE_BASE+"{runindex}"+".ab."+"{subindex:04d}"+RECO_EXTENSION, + subindex=range(1,4), + runindex=1 + ), output: - RECON_DIRECTORY+FILE_BASE+"{index}_reco.{tag}"+SIM_EXTENSION, + RECO_DIRECTORY_P+FILE_BASE+".ab"+RECO_EXTENSION, shell: """ -/home/simong/EIC/EICrecon/bin/eicrecon {input[0]} -Pjana:nevents=400 -Pdd4hep:xml_files={params.XML} -Ppodio:output_include_collections={params.collections} -Ppodio:output_file={output[0]} -PLOWQ2:LowQ2Trajectories:electron_beamE=18 +/home/simong/EIC/EICrecon/bin/eicrecon {input} -Pdd4hep:xml_files={params.XML} -Ppodio:output_include_collections={params.collections} -Ppodio:output_file={output[0]} -PLOWQ2:LowQ2Trajectories:electron_beamE={params.beam_energy} """ rule low_q2_train_network: params: - beam_energy="18", + beam_energy=BEAM_ENERGY, type_name="LowQ2MomentumRegression", - method_name="DNN" + method_name="DNN", + model_dir="LowQ2Model" input: - train_data=RECON_DIRECTORY+FILE_BASE+"{index}_reco.{tag}"+SIM_EXTENSION + train_data=RECO_DIRECTORY_P+FILE_BASE+".fab"+RECO_EXTENSION, output: - root_output=MODEL_DIRECTORY+trainedData.root", - model_output=MODEL_DIRECTORY+"weights/" + root_output=MODEL_DIRECTORY+"trainedData.root", shell: """ - root -l -b -q 'TaggerRegressionEICrecon.C+("{input.train_data}", "{output.root_output}", MODEL_DIRECTORY, "{params.beam_energy}", "{params.type_name}", "{params.method_name}")' + root -l -b -q 'TaggerRegressionEICrecon.C++("{input.train_data}", "{output.root_output}", "{params.model_dir}", "{params.beam_energy}", "{params.type_name}", "{params.method_name}")' """ diff --git a/benchmarks/LOWQ2/training/TaggerRegressionEICrecon.C b/benchmarks/LOWQ2/training/TaggerRegressionEICrecon.C index 9d02190a..fc2226f2 100644 --- a/benchmarks/LOWQ2/training/TaggerRegressionEICrecon.C +++ b/benchmarks/LOWQ2/training/TaggerRegressionEICrecon.C @@ -27,7 +27,7 @@ using namespace TMVA; void TaggerRegressionEICrecon( TString inDataNames = "/scratch/EIC/ReconOut/qr_18x275_ab/qr_18x275_ab*_recon.edm4hep.root", TString outDataName = "/scratch/EIC/LowQ2Model/trainedData.root", - TString dataFolderName = "/scratch/EIC/LowQ2Model/", + TString dataFolderName = "LowQ2Model", TString mcBeamEnergy = "18", TString typeName = "LowQ2MomentumRegression", TString methodName = "DNN", @@ -57,7 +57,7 @@ void TaggerRegressionEICrecon( TMVA::DataLoader *dataloader=new TMVA::DataLoader(dataFolderName); // Input TrackParameters variables from EICrecon - - TString collectionName = "LowQ2Tracks[0]"; + TString collectionName = "TaggerTrackerProjectedTracks[0]"; dataloader->AddVariable( collectionName+".loc.a", "fit_position_y", "units", 'F' ); dataloader->AddVariable( collectionName+".loc.b", "fit_position_z", "units", 'F' ); dataloader->AddVariable( "sin("+collectionName+".phi)*sin("+collectionName+".theta)", "fit_vector_x", "units", 'F' ); @@ -66,7 +66,8 @@ void TaggerRegressionEICrecon( // Regression target particle 3-momentum, normalised to beam energy. // Takes second particle, in the test data this is the scattered electron // TODO add energy and array element information to be read directly from datafile - EMD4eic and EICrecon changes. - TString mcParticleName = "ScatteredElectron[0]"; + TString mcParticleName = "MCParticles[MCScatteredElectrons_objIdx[0].index]"; + //TString mcParticleName = "MCParticles[0]"; dataloader->AddTarget( mcParticleName+".momentum.x/"+mcBeamEnergy ); dataloader->AddTarget( mcParticleName+".momentum.y/"+mcBeamEnergy ); dataloader->AddTarget( mcParticleName+".momentum.z/"+mcBeamEnergy ); @@ -88,7 +89,7 @@ void TaggerRegressionEICrecon( // expression need to exist in the original TTree) // dataloader->SetWeightExpression( "1/(eE)", "Regression" ); // If MC event weights are kept use these // Apply additional cuts on the data - TCut mycut = "@LowQ2Tracks.size()==1"; // Make sure there's one reconstructed track in event + TCut mycut = "@TaggerTrackerProjectedTracks.size()==1"; // Make sure there's one reconstructed track in event dataloader->PrepareTrainingAndTestTree(mycut,"nTrain_Regression=0:nTest_Regression=0:SplitMode=Random:SplitSeed=1:NormMode=NumEvents:!V"); @@ -96,12 +97,14 @@ void TaggerRegressionEICrecon( TString layoutString("Layout=TANH|1024,TANH|128,TANH|64,TANH|32,LINEAR"); TString trainingStrategyString("TrainingStrategy="); - trainingStrategyString +="LearningRate=1e-4,Momentum=0,MaxEpochs=2000,ConvergenceSteps=200,BatchSize=64,TestRepetitions=1,Regularization=None,Optimizer=ADAM"; + //trainingStrategyString +="LearningRate=1e-4,Momentum=0,MaxEpochs=2000,ConvergenceSteps=200,BatchSize=64,TestRepetitions=1,Regularization=None,Optimizer=ADAM"; + trainingStrategyString +="LearningRate=1e-4,Momentum=0,MaxEpochs=2000,ConvergenceSteps=200,BatchSize=10,TestRepetitions=1,Regularization=None,Optimizer=ADAM"; TString nnOptions("!H:V:ErrorStrategy=SUMOFSQUARES:WeightInitialization=XAVIERUNIFORM:RandomSeed=1234"); // Use GPU if possible on the machine - TString architectureString("Architecture=GPU"); + //TString architectureString("Architecture=GPU"); + TString architectureString("Architecture=CPU"); // Transformation of data prior to training layers - decorrelate and normalise whole dataset TString transformString("VarTransform=D,N"); @@ -146,7 +149,7 @@ void TaggerRegressionEICrecon( std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVARegression is done!" << std::endl; - delete factory; - delete dataloader; + // delete factory; + // delete dataloader; }