Skip to content

Commit

Permalink
Updated to work locally at least
Browse files Browse the repository at this point in the history
  • Loading branch information
simonge committed May 7, 2024
1 parent 067f01c commit c319d83
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 30 deletions.
6 changes: 3 additions & 3 deletions benchmarks/LOWQ2/config.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Run Snakemake for the training
training:
# Training configuration goes here



analysis:
# Analysis configuration goes here
108 changes: 89 additions & 19 deletions benchmarks/LOWQ2/training/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,47 +11,117 @@ S3 = S3RemoteProvider(
secret_access_key=os.environ["S3_SECRET_KEY"],
)

EVENTS_DIRECTORY = "/scratch/EIC/Events/S3in/"
SIM_DIRECTORY = "/scratch/EIC/G4out/S3processed/"
RECO_DIRECTORY = "/scratch/EIC/ReconOut/S3in/"
RECO_DIRECTORY_P = "/scratch/EIC/ReconOut/S3processed/"
MODEL_DIRECTORY = "/scratch/EIC/LowQ2Model/"

SIM_DIRECTORY = "/scratch/EIC/G4out/S3/"
RECON_DIRECTORY = "/scratch/EIC/ReconOut/S3/"
MODEL_DIRECTORY = "/scratch/EIC/LowQ2Model/"
REMOTE_DIRECTORY = "eictest/EPIC/EVGEN/SIDIS/pythia6-eic/1.0.0/10x100/q2_0to1/"
FILE_BASE = "pythia_ep_noradcor_10x100_q2_0.000000001_1.0_run"
XML_FILE = "/home/simong/EIC/epic/epic_18x275.xml"
REMOTE_EVENTS_DIRECTORY = "EPIC/EVGEN/SIDIS/pythia6-eic/1.0.0/10x100/q2_0to1/"
REMOTE_RECO_DIRECTORY = "EPIC/RECO/24.02.1/epic_craterlake/SIDIS/pythia6-eic/1.0.0/10x100/q2_0to1/"
S3_DIR = "eictest/"
XROOTD_SERVER = "root://dtn-eic.jlab.org/"
XROOTD_DIR = "/work/eic2/"

rule download_input:
EVENT_EXTENSION = ".ab.hepmc3.tree.root"
SIM_EXTENSION = ".edm4hep.root"
RECO_EXTENSION = ".eicrecon.tree.edm4eic.root"

FILE_BASE = "pythia_ep_noradcor_10x100_q2_0.000000001_1.0_run"
XML_FILE = "/home/simong/EIC/epic/install/share/epic/epic_craterlake_10x100.xml"
XML_FILE_TAGGER = "/home/simong/EIC/epic/install/share/epic/epic_ip6.xml"
XML_FILE_EDIT = "/home/simong/EIC/epic/epic_edit.xml"
BEAM_ENERGY = "10"

rule download_event_input:
input:
S3.remote(REMOTE_DIRECTORY+FILE_BASE+"{index}"+EVENT_EXTENSION),
S3.remote(REMOTE_EVENTS_DIRECTORY+FILE_BASE+"{index}"+EVENT_EXTENSION),
output:
EVENTS_DIRECTORY+FILE_BASE+"{index}"+EVENT_EXTENSION,
run:
shutil.move(input[0], output[0])


def remote_file_exists(server,url):
try:
subprocess.check_output(['xrdfs', server, 'stat', url])
return True
except subprocess.CalledProcessError:
return False

rule run_simulation_tagger:
params:
XML=XML_FILE_EDIT,
input=lambda wildcards: XROOTD_SERVER+XROOTD_DIR+REMOTE_EVENTS_DIRECTORY+FILE_BASE+wildcards.index+EVENT_EXTENSION if remote_file_exists(XROOTD_SERVER,XROOTD_DIR+REMOTE_EVENTS_DIRECTORY+FILE_BASE+wildcards.index+EVENT_EXTENSION) else None,
output:
SIM_DIRECTORY+FILE_BASE+"{index}_tagger"+SIM_EXTENSION,
shell: """
npsim \
--inputFiles {params.input} \
--outputFile {output[0]} \
--compactFile {params.XML} \
--runType run \
--numberOfEvents 100000 \
--physics.list FTFP_BERT \
--field.eps_min 5e-06 \
--field.eps_max 1e-04 \
--physics.rangecut 50 \
"""

rule download_recon:
input:
S3.remote(S3_DIR+REMOTE_RECO_DIRECTORY+FILE_BASE+"{runindex}"+".ab."+"{subindex}"+RECO_EXTENSION),
output:
RECO_DIRECTORY+FILE_BASE+"{runindex}"+".ab."+"{subindex}"+RECO_EXTENSION,
run:
shutil.move(input[0], output[0])

rule run_reconstruction:
params:
XML=XML_FILE,
beam_energy=BEAM_ENERGY,
collections="TaggerTrackerProjectedTracks,MCScatteredElectrons,MCParticles,EventHeader",
input:
SIM_DIRECTORY+FILE_BASE+"{index}"+SIM_EXTENSION,
"/home/simong/EIC/EICrecon/bin/eicrecon",
expand(
SIM_DIRECTORY+FILE_BASE+"{index}_tagger"+SIM_EXTENSION,
index=range(1,10),
),
output:
RECO_DIRECTORY_P+FILE_BASE+".fab"+RECO_EXTENSION,
shell: """
/home/simong/EIC/EICrecon/bin/eicrecon {input} -Pdd4hep:xml_files={params.XML} -Ppodio:output_include_collections={params.collections} -Ppodio:output_file={output[0]} -PLOWQ2:LowQ2Trajectories:electron_beamE={params.beam_energy}
"""

rule run_reconstruction_remote:
params:
XML=XML_FILE,
collections="LowQ2Tracks,ScatteredElectron",
beam_energy=BEAM_ENERGY,
collections="TaggerTrackerProjectedTracks,MCScatteredElectrons,MCParticles,EventHeader",
stringValue=RECO_DIRECTORY+FILE_BASE+"*.ab.*"+RECO_EXTENSION,
input:
expand(
RECO_DIRECTORY+FILE_BASE+"{runindex}"+".ab."+"{subindex:04d}"+RECO_EXTENSION,
subindex=range(1,4),
runindex=1
),
output:
RECON_DIRECTORY+FILE_BASE+"{index}_reco.{tag}"+SIM_EXTENSION,
RECO_DIRECTORY_P+FILE_BASE+".ab"+RECO_EXTENSION,
shell: """
/home/simong/EIC/EICrecon/bin/eicrecon {input[0]} -Pjana:nevents=400 -Pdd4hep:xml_files={params.XML} -Ppodio:output_include_collections={params.collections} -Ppodio:output_file={output[0]} -PLOWQ2:LowQ2Trajectories:electron_beamE=18
/home/simong/EIC/EICrecon/bin/eicrecon {input} -Pdd4hep:xml_files={params.XML} -Ppodio:output_include_collections={params.collections} -Ppodio:output_file={output[0]} -PLOWQ2:LowQ2Trajectories:electron_beamE={params.beam_energy}
"""

rule low_q2_train_network:
params:
beam_energy="18",
beam_energy=BEAM_ENERGY,
type_name="LowQ2MomentumRegression",
method_name="DNN"
method_name="DNN",
model_dir="LowQ2Model"
input:
train_data=RECON_DIRECTORY+FILE_BASE+"{index}_reco.{tag}"+SIM_EXTENSION
train_data=RECO_DIRECTORY_P+FILE_BASE+".fab"+RECO_EXTENSION,
output:
root_output=MODEL_DIRECTORY+trainedData.root",
model_output=MODEL_DIRECTORY+"weights/"
root_output=MODEL_DIRECTORY+"trainedData.root",
shell:
"""
root -l -b -q 'TaggerRegressionEICrecon.C+("{input.train_data}", "{output.root_output}", MODEL_DIRECTORY, "{params.beam_energy}", "{params.type_name}", "{params.method_name}")'
root -l -b -q 'TaggerRegressionEICrecon.C++("{input.train_data}", "{output.root_output}", "{params.model_dir}", "{params.beam_energy}", "{params.type_name}", "{params.method_name}")'
"""

19 changes: 11 additions & 8 deletions benchmarks/LOWQ2/training/TaggerRegressionEICrecon.C
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ using namespace TMVA;
void TaggerRegressionEICrecon(
TString inDataNames = "/scratch/EIC/ReconOut/qr_18x275_ab/qr_18x275_ab*_recon.edm4hep.root",
TString outDataName = "/scratch/EIC/LowQ2Model/trainedData.root",
TString dataFolderName = "/scratch/EIC/LowQ2Model/",
TString dataFolderName = "LowQ2Model",
TString mcBeamEnergy = "18",
TString typeName = "LowQ2MomentumRegression",
TString methodName = "DNN",
Expand Down Expand Up @@ -57,7 +57,7 @@ void TaggerRegressionEICrecon(
TMVA::DataLoader *dataloader=new TMVA::DataLoader(dataFolderName);

// Input TrackParameters variables from EICrecon -
TString collectionName = "LowQ2Tracks[0]";
TString collectionName = "TaggerTrackerProjectedTracks[0]";
dataloader->AddVariable( collectionName+".loc.a", "fit_position_y", "units", 'F' );
dataloader->AddVariable( collectionName+".loc.b", "fit_position_z", "units", 'F' );
dataloader->AddVariable( "sin("+collectionName+".phi)*sin("+collectionName+".theta)", "fit_vector_x", "units", 'F' );
Expand All @@ -66,7 +66,8 @@ void TaggerRegressionEICrecon(
// Regression target particle 3-momentum, normalised to beam energy.
// Takes second particle, in the test data this is the scattered electron
// TODO add energy and array element information to be read directly from datafile - EMD4eic and EICrecon changes.
TString mcParticleName = "ScatteredElectron[0]";
TString mcParticleName = "MCParticles[MCScatteredElectrons_objIdx[0].index]";
//TString mcParticleName = "MCParticles[0]";
dataloader->AddTarget( mcParticleName+".momentum.x/"+mcBeamEnergy );
dataloader->AddTarget( mcParticleName+".momentum.y/"+mcBeamEnergy );
dataloader->AddTarget( mcParticleName+".momentum.z/"+mcBeamEnergy );
Expand All @@ -88,20 +89,22 @@ void TaggerRegressionEICrecon(
// expression need to exist in the original TTree)
// dataloader->SetWeightExpression( "1/(eE)", "Regression" ); // If MC event weights are kept use these
// Apply additional cuts on the data
TCut mycut = "@LowQ2Tracks.size()==1"; // Make sure there's one reconstructed track in event
TCut mycut = "@TaggerTrackerProjectedTracks.size()==1"; // Make sure there's one reconstructed track in event

dataloader->PrepareTrainingAndTestTree(mycut,"nTrain_Regression=0:nTest_Regression=0:SplitMode=Random:SplitSeed=1:NormMode=NumEvents:!V");

// TODO - Optimise layout and training more
TString layoutString("Layout=TANH|1024,TANH|128,TANH|64,TANH|32,LINEAR");

TString trainingStrategyString("TrainingStrategy=");
trainingStrategyString +="LearningRate=1e-4,Momentum=0,MaxEpochs=2000,ConvergenceSteps=200,BatchSize=64,TestRepetitions=1,Regularization=None,Optimizer=ADAM";
//trainingStrategyString +="LearningRate=1e-4,Momentum=0,MaxEpochs=2000,ConvergenceSteps=200,BatchSize=64,TestRepetitions=1,Regularization=None,Optimizer=ADAM";
trainingStrategyString +="LearningRate=1e-4,Momentum=0,MaxEpochs=2000,ConvergenceSteps=200,BatchSize=10,TestRepetitions=1,Regularization=None,Optimizer=ADAM";

TString nnOptions("!H:V:ErrorStrategy=SUMOFSQUARES:WeightInitialization=XAVIERUNIFORM:RandomSeed=1234");

// Use GPU if possible on the machine
TString architectureString("Architecture=GPU");
//TString architectureString("Architecture=GPU");
TString architectureString("Architecture=CPU");
// Transformation of data prior to training layers - decorrelate and normalise whole dataset
TString transformString("VarTransform=D,N");
Expand Down Expand Up @@ -146,7 +149,7 @@ void TaggerRegressionEICrecon(
std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
std::cout << "==> TMVARegression is done!" << std::endl;

delete factory;
delete dataloader;
// delete factory;
// delete dataloader;

}

0 comments on commit c319d83

Please sign in to comment.