From acbb6c562ea4fe870c733a2557cc16f7e58c6765 Mon Sep 17 00:00:00 2001
From: Sakib Rahman <rahmans@myumanitoba.ca>
Date: Tue, 30 Jan 2024 16:29:08 -0500
Subject: [PATCH] Use a nested directory approach to uniquely identify models
 instead of hash following
 https://waterdata.usgs.gov/blog/snakemake-for-ml-experiments/. Establishes a
 DAG with greater parallelization of processes.

---
 benchmarks/roman_pots/Snakefile               | 312 +++++++++---------
 .../roman_pots/train_dense_neural_network.py  | 120 +++----
 2 files changed, 207 insertions(+), 225 deletions(-)

diff --git a/benchmarks/roman_pots/Snakefile b/benchmarks/roman_pots/Snakefile
index 3827c080..d0d66e4f 100644
--- a/benchmarks/roman_pots/Snakefile
+++ b/benchmarks/roman_pots/Snakefile
@@ -1,197 +1,195 @@
 from itertools import product
-import hashlib
 
 DETECTOR_PATH = os.environ["DETECTOR_PATH"]
 DETECTOR_VERSION = os.environ["DETECTOR_VERSION"]
 SUBSYSTEM = "roman_pots"
 BENCHMARK = "dense_neural_network"
-DETECTOR_CONFIG = ["epic_ip6"]
-NUM_EPOCHS_PZ = [100]
-LEARNING_RATE_PZ = [0.01]
-SIZE_INPUT_PZ = [4]
-SIZE_OUTPUT_PZ = [1]
-N_LAYERS_PZ = [3,6]
-SIZE_FIRST_HIDDEN_LAYER_PZ = [128]
-MULTIPLIER_PZ = [0.5]
-LEAK_RATE_PZ = [0.025]
-NUM_EPOCHS_PY = [100]
-LEARNING_RATE_PY = [0.01]
-SIZE_INPUT_PY = [3]
-SIZE_OUTPUT_PY = [1]
-N_LAYERS_PY = [3,6]
-SIZE_FIRST_HIDDEN_LAYER_PY = [128]
-MULTIPLIER_PY = [0.5]
-LEAK_RATE_PY = [0.025]
-NUM_EPOCHS_PX = [100]
-LEARNING_RATE_PX = [0.01]
-SIZE_INPUT_PX = [3]
-SIZE_OUTPUT_PX = [1]
-N_LAYERS_PX = [3,7]
-SIZE_FIRST_HIDDEN_LAYER_PX = [128]
-MULTIPLIER_PX = [0.5]
-LEAK_RATE_PX = [0.025]
-MAX_HASH = 6
-NFILES = range(1,11)
-NEVENTS_PER_FILE = [100]
-NUM_TRAINING_INPUTS = [int(0.5*max(NFILES)),int(0.7*max(NFILES))]
-MODEL_VERSION = [
-    hashlib.sha512("_".join(map(str,x)).encode()).hexdigest()[:MAX_HASH]
-    for x in product(
-        NEVENTS_PER_FILE, NUM_TRAINING_INPUTS,
-        NUM_EPOCHS_PZ, LEARNING_RATE_PZ, SIZE_INPUT_PZ, SIZE_OUTPUT_PZ, N_LAYERS_PZ, SIZE_FIRST_HIDDEN_LAYER_PZ, MULTIPLIER_PZ, LEAK_RATE_PZ,
-        NUM_EPOCHS_PY, LEARNING_RATE_PY, SIZE_INPUT_PY, SIZE_OUTPUT_PY, N_LAYERS_PY, SIZE_FIRST_HIDDEN_LAYER_PY, MULTIPLIER_PY, LEAK_RATE_PY,
-        NUM_EPOCHS_PX, LEARNING_RATE_PX, SIZE_INPUT_PX, SIZE_OUTPUT_PX, N_LAYERS_PX, SIZE_FIRST_HIDDEN_LAYER_PX, MULTIPLIER_PX, LEAK_RATE_PX
-    )
-]
+DETECTOR_CONFIG = "epic_ip6"
+NEVENTS_PER_FILE = 5
+NFILES = range(1,6)
+MODEL_PZ = {
+  'num_epochs' : [100],
+  'learning_rate' : [0.01],
+  'size_input' : [4],
+  'size_output': [1],
+  'n_layers' : [3,6],
+  'size_first_hidden_layer' : [128],
+  'multiplier' : [0.5],
+  'leak_rate' : [0.025],
+}
+MODEL_PY = {
+  'num_epochs' : [100],
+  'learning_rate' : [0.01],
+  'size_input' : [3],
+  'size_output': [1],
+  'n_layers' : [3,6],
+  'size_first_hidden_layer' : [128],
+  'multiplier' : [0.5],
+  'leak_rate' : [0.025]
+}
+MODEL_PX = {
+  'num_epochs' : [100],
+  'learning_rate' : [0.01],
+  'size_input' : [3],
+  'size_output': [1],
+  'n_layers' : [3,7],
+  'size_first_hidden_layer' : [128],
+  'multiplier' : [0.5],
+  'leak_rate' : [0.025]
+}
 
-rule target_generate:
+rule all:
   input:
-    expand("results/"+DETECTOR_VERSION+"/{detector_config}/detector_benchmarks/"+SUBSYSTEM+"/"+BENCHMARK+"/raw_data/"+DETECTOR_VERSION+"_{detector_config}_{index}.edm4hep.root",
-           detector_config=DETECTOR_CONFIG,
+    expand("results/"+DETECTOR_VERSION+"/"+DETECTOR_CONFIG+"/detector_benchmarks/"+SUBSYSTEM+"/"+BENCHMARK+"/raw_data/"+DETECTOR_VERSION+"_"+DETECTOR_CONFIG+"_{index}.edm4hep.root",           
            index=NFILES),
-    expand("results/"+DETECTOR_VERSION+"/{detector_config}/detector_benchmarks/"+SUBSYSTEM+"/"+BENCHMARK+"/processed_data/"+DETECTOR_VERSION+"_{detector_config}_{index}.txt",
-           detector_config=DETECTOR_CONFIG,
+    expand("results/"+DETECTOR_VERSION+"/"+DETECTOR_CONFIG+"/detector_benchmarks/"+SUBSYSTEM+"/"+BENCHMARK+"/processed_data/"+DETECTOR_VERSION+"_"+DETECTOR_CONFIG+"_{index}.txt",
            index=NFILES),
-    expand("results/"+str(DETECTOR_VERSION)+"/{detector_config}/detector_benchmarks/"+str(SUBSYSTEM)+"/"+str(BENCHMARK)+"/metadata/"+str(DETECTOR_VERSION)+"_{detector_config}_"+str(SUBSYSTEM)+"_"+str(BENCHMARK)+"_{model_version}.txt",
-           detector_config=DETECTOR_CONFIG,
-           model_version=MODEL_VERSION)
+    expand("results/"+DETECTOR_VERSION+"/{detector_config}/detector_benchmarks/"+SUBSYSTEM+"/"+BENCHMARK+"/artifacts/model_pz/num_epochs_{num_epochs}/learning_rate_{learning_rate}/size_input_{size_input}/size_output_{size_output}/n_layers_{n_layers}/size_first_hidden_layer_{size_first_hidden_layer}/multiplier_{multiplier}/leak_rate_{leak_rate}/model_pz.pt",
+           detector_config=DETECTOR_CONFIG,
+           num_epochs=MODEL_PZ["num_epochs"],
+           learning_rate=MODEL_PZ["learning_rate"],
+           size_input=MODEL_PZ["size_input"],
+           size_output=MODEL_PZ["size_output"],
+           n_layers=MODEL_PZ["n_layers"],
+           size_first_hidden_layer=MODEL_PZ["size_first_hidden_layer"],
+           multiplier=MODEL_PZ["multiplier"],
+           leak_rate=MODEL_PZ["leak_rate"]
+           ),
+    expand("results/"+DETECTOR_VERSION+"/{detector_config}/detector_benchmarks/"+SUBSYSTEM+"/"+BENCHMARK+"/artifacts/model_pz/num_epochs_{num_epochs}/learning_rate_{learning_rate}/size_input_{size_input}/size_output_{size_output}/n_layers_{n_layers}/size_first_hidden_layer_{size_first_hidden_layer}/multiplier_{multiplier}/leak_rate_{leak_rate}/LossVsEpoch_model_pz.png",
+           detector_config=DETECTOR_CONFIG,
+           num_epochs=MODEL_PZ["num_epochs"],
+           learning_rate=MODEL_PZ["learning_rate"],
+           size_input=MODEL_PZ["size_input"],
+           size_output=MODEL_PZ["size_output"],
+           n_layers=MODEL_PZ["n_layers"],
+           size_first_hidden_layer=MODEL_PZ["size_first_hidden_layer"],
+           multiplier=MODEL_PZ["multiplier"],
+           leak_rate=MODEL_PZ["leak_rate"]
+           ),
+    expand("results/"+DETECTOR_VERSION+"/{detector_config}/detector_benchmarks/"+SUBSYSTEM+"/"+BENCHMARK+"/artifacts/model_py/num_epochs_{num_epochs}/learning_rate_{learning_rate}/size_input_{size_input}/size_output_{size_output}/n_layers_{n_layers}/size_first_hidden_layer_{size_first_hidden_layer}/multiplier_{multiplier}/leak_rate_{leak_rate}/model_py.pt",
+           detector_config=DETECTOR_CONFIG,
+           num_epochs=MODEL_PY["num_epochs"],
+           learning_rate=MODEL_PY["learning_rate"],
+           size_input=MODEL_PY["size_input"],
+           size_output=MODEL_PY["size_output"],
+           n_layers=MODEL_PY["n_layers"],
+           size_first_hidden_layer=MODEL_PY["size_first_hidden_layer"],
+           multiplier=MODEL_PY["multiplier"],
+           leak_rate=MODEL_PY["leak_rate"]
+           ),
+    expand("results/"+DETECTOR_VERSION+"/{detector_config}/detector_benchmarks/"+SUBSYSTEM+"/"+BENCHMARK+"/artifacts/model_py/num_epochs_{num_epochs}/learning_rate_{learning_rate}/size_input_{size_input}/size_output_{size_output}/n_layers_{n_layers}/size_first_hidden_layer_{size_first_hidden_layer}/multiplier_{multiplier}/leak_rate_{leak_rate}/LossVsEpoch_model_py.png",
+           detector_config=DETECTOR_CONFIG,
+           num_epochs=MODEL_PY["num_epochs"],
+           learning_rate=MODEL_PY["learning_rate"],
+           size_input=MODEL_PY["size_input"],
+           size_output=MODEL_PY["size_output"],
+           n_layers=MODEL_PY["n_layers"],
+           size_first_hidden_layer=MODEL_PY["size_first_hidden_layer"],
+           multiplier=MODEL_PY["multiplier"],
+           leak_rate=MODEL_PY["leak_rate"]
+           ),
+    expand("results/"+DETECTOR_VERSION+"/{detector_config}/detector_benchmarks/"+SUBSYSTEM+"/"+BENCHMARK+"/artifacts/model_px/num_epochs_{num_epochs}/learning_rate_{learning_rate}/size_input_{size_input}/size_output_{size_output}/n_layers_{n_layers}/size_first_hidden_layer_{size_first_hidden_layer}/multiplier_{multiplier}/leak_rate_{leak_rate}/model_px.pt",
+           detector_config=DETECTOR_CONFIG,
+           num_epochs=MODEL_PX["num_epochs"],
+           learning_rate=MODEL_PX["learning_rate"],
+           size_input=MODEL_PX["size_input"],
+           size_output=MODEL_PX["size_output"],
+           n_layers=MODEL_PX["n_layers"],
+           size_first_hidden_layer=MODEL_PX["size_first_hidden_layer"],
+           multiplier=MODEL_PX["multiplier"],
+           leak_rate=MODEL_PX["leak_rate"]
+           ),
+    expand("results/"+DETECTOR_VERSION+"/{detector_config}/detector_benchmarks/"+SUBSYSTEM+"/"+BENCHMARK+"/artifacts/model_px/num_epochs_{num_epochs}/learning_rate_{learning_rate}/size_input_{size_input}/size_output_{size_output}/n_layers_{n_layers}/size_first_hidden_layer_{size_first_hidden_layer}/multiplier_{multiplier}/leak_rate_{leak_rate}/LossVsEpoch_model_px.png",
+           detector_config=DETECTOR_CONFIG,
+           num_epochs=MODEL_PX["num_epochs"],
+           learning_rate=MODEL_PX["learning_rate"],
+           size_input=MODEL_PX["size_input"],
+           size_output=MODEL_PX["size_output"],
+           n_layers=MODEL_PX["n_layers"],
+           size_first_hidden_layer=MODEL_PX["size_first_hidden_layer"],
+           multiplier=MODEL_PX["multiplier"],
+           leak_rate=MODEL_PX["leak_rate"]
+           )
+ 
+
 
-rule target_train:
-  input:
-    expand("results/"+str(DETECTOR_VERSION)+"/{detector_config}/detector_benchmarks/"+str(SUBSYSTEM)+"/"+str(BENCHMARK)+"/trained_models/model_pz_"+str(DETECTOR_VERSION)+"_{detector_config}_"+str(SUBSYSTEM)+"_"+str(BENCHMARK)+"_{model_version}.pt",
-           detector_config=DETECTOR_CONFIG,
-           model_version=MODEL_VERSION),
-    expand("results/"+str(DETECTOR_VERSION)+"/{detector_config}/detector_benchmarks/"+str(SUBSYSTEM)+"/"+str(BENCHMARK)+"/trained_models/model_py_"+str(DETECTOR_VERSION)+"_{detector_config}_"+str(SUBSYSTEM)+"_"+str(BENCHMARK)+"_{model_version}.pt",
-           detector_config=DETECTOR_CONFIG,
-           model_version=MODEL_VERSION),
-    expand("results/"+str(DETECTOR_VERSION)+"/{detector_config}/detector_benchmarks/"+str(SUBSYSTEM)+"/"+str(BENCHMARK)+"/trained_models/model_px_"+str(DETECTOR_VERSION)+"_{detector_config}_"+str(SUBSYSTEM)+"_"+str(BENCHMARK)+"_{model_version}.pt",
-           detector_config=DETECTOR_CONFIG,
-           model_version=MODEL_VERSION),
-    expand("results/"+str(DETECTOR_VERSION)+"/{detector_config}/detector_benchmarks/"+str(SUBSYSTEM)+"/"+str(BENCHMARK)+"/artifacts/LossVsEpoch_model_pz_"+str(DETECTOR_VERSION)+"_{detector_config}_"+str(SUBSYSTEM)+"_"+str(BENCHMARK)+"_{model_version}.png",
-           detector_config=DETECTOR_CONFIG,
-           model_version=MODEL_VERSION),
-    expand("results/"+str(DETECTOR_VERSION)+"/{detector_config}/detector_benchmarks/"+str(SUBSYSTEM)+"/"+str(BENCHMARK)+"/artifacts/LossVsEpoch_model_py_"+str(DETECTOR_VERSION)+"_{detector_config}_"+str(SUBSYSTEM)+"_"+str(BENCHMARK)+"_{model_version}.png",
-           detector_config=DETECTOR_CONFIG,
-           model_version=MODEL_VERSION),
-    expand("results/"+str(DETECTOR_VERSION)+"/{detector_config}/detector_benchmarks/"+str(SUBSYSTEM)+"/"+str(BENCHMARK)+"/artifacts/LossVsEpoch_model_px_"+str(DETECTOR_VERSION)+"_{detector_config}_"+str(SUBSYSTEM)+"_"+str(BENCHMARK)+"_{model_version}.png",
-           detector_config=DETECTOR_CONFIG,
-           model_version=MODEL_VERSION)
 
 rule roman_pots_generate_events:
     input:
         script="steering_file.py"
     params:
         detector_path=DETECTOR_PATH,
-        nevents_per_file=NEVENTS_PER_FILE
+        nevents_per_file=NEVENTS_PER_FILE,
+        detector_config=DETECTOR_CONFIG
     output:
-        "results/"+DETECTOR_VERSION+"/{detector_config}/detector_benchmarks/"+SUBSYSTEM+"/"+BENCHMARK+"/raw_data/"+DETECTOR_VERSION+"_{detector_config}_{index}.edm4hep.root"
+        "results/"+DETECTOR_VERSION+"/"+DETECTOR_CONFIG+"/detector_benchmarks/"+SUBSYSTEM+"/"+BENCHMARK+"/raw_data/"+DETECTOR_VERSION+"_"+DETECTOR_CONFIG+"_{index}.edm4hep.root"
     shell:
       """
       npsim --steeringFile {input.script} \
-            --compactFile {params.detector_path}/{wildcards.detector_config}.xml \
+            --compactFile {params.detector_path}/{params.detector_config}.xml \
             --outputFile {output} \
             -N {params.nevents_per_file}
       """
 
 rule roman_pots_preprocess_model_training_data:
   input:
-    data = "results/"+DETECTOR_VERSION+"/{detector_config}/detector_benchmarks/"+SUBSYSTEM+"/"+BENCHMARK+"/raw_data/"+DETECTOR_VERSION+"_{detector_config}_{index}.edm4hep.root",
+    data = "results/"+DETECTOR_VERSION+"/"+DETECTOR_CONFIG+"/detector_benchmarks/"+SUBSYSTEM+"/"+BENCHMARK+"/raw_data/"+DETECTOR_VERSION+"_"+DETECTOR_CONFIG+"_{index}.edm4hep.root",
     script = "preprocess_model_training_data.cxx"
   output:
-    "results/"+DETECTOR_VERSION+"/{detector_config}/detector_benchmarks/"+SUBSYSTEM+"/"+BENCHMARK+"/processed_data/"+DETECTOR_VERSION+"_{detector_config}_{index}.txt"
+    "results/"+DETECTOR_VERSION+"/"+DETECTOR_CONFIG+"/detector_benchmarks/"+SUBSYSTEM+"/"+BENCHMARK+"/processed_data/"+DETECTOR_VERSION+"_"+DETECTOR_CONFIG+"_{index}.txt"
   shell:
     """
     root -q -b {input.script}\"(\\\"{input.data}\\\",\\\"{output}\\\")\"
     """
      
-rule roman_pots_generate_neural_network_configs:
-  input:   
-  output:
-    expand("results/"+str(DETECTOR_VERSION)+"/{detector_config}/detector_benchmarks/"+str(SUBSYSTEM)+"/"+str(BENCHMARK)+"/metadata/"+str(DETECTOR_VERSION)+"_{detector_config}_"+str(SUBSYSTEM)+"_"+str(BENCHMARK)+"_{model_version}.txt",
-           detector_config=DETECTOR_CONFIG,
-           model_version=MODEL_VERSION)        
-  run:
-    for detector_config, nevents_per_file, num_training_inputs, \
-    num_epochs_pz, learning_rate_pz, size_input_pz, size_output_pz, n_layers_pz, size_first_hidden_layer_pz, multiplier_pz, leak_rate_pz, \ 
-    num_epochs_py, learning_rate_py, size_input_py, size_output_py, n_layers_py, size_first_hidden_layer_py, multiplier_py, leak_rate_py, \
-    num_epochs_px, learning_rate_px, size_input_px, size_output_px, n_layers_px, size_first_hidden_layer_px, multiplier_px, leak_rate_px in \
-    product(DETECTOR_CONFIG, NEVENTS_PER_FILE, NUM_TRAINING_INPUTS,
-    NUM_EPOCHS_PZ, LEARNING_RATE_PZ, SIZE_INPUT_PZ, SIZE_OUTPUT_PZ, N_LAYERS_PZ, SIZE_FIRST_HIDDEN_LAYER_PZ, MULTIPLIER_PZ, LEAK_RATE_PZ,
-    NUM_EPOCHS_PY, LEARNING_RATE_PY, SIZE_INPUT_PY, SIZE_OUTPUT_PY, N_LAYERS_PY, SIZE_FIRST_HIDDEN_LAYER_PY, MULTIPLIER_PY, LEAK_RATE_PY,
-    NUM_EPOCHS_PX, LEARNING_RATE_PX, SIZE_INPUT_PX, SIZE_OUTPUT_PX, N_LAYERS_PX, SIZE_FIRST_HIDDEN_LAYER_PX, MULTIPLIER_PX, LEAK_RATE_PX): 
-      output_dir = "results/"+str(DETECTOR_VERSION)+"/"+str(detector_config)+"/detector_benchmarks/"+str(SUBSYSTEM)+"/"+str(BENCHMARK)+"/metadata"  
-      output_file = str(nevents_per_file)+"_"+str(num_training_inputs)+"_"+\
-                    str(num_epochs_pz)+"_"+str(learning_rate_pz)+"_"+str(size_input_pz)+"_"+str(size_output_pz)+"_"+str(n_layers_pz)+"_"+str(size_first_hidden_layer_pz)+"_"+str(multiplier_pz)+"_"+str(leak_rate_pz)+"_"+\
-                    str(num_epochs_py)+"_"+str(learning_rate_py)+"_"+str(size_input_py)+"_"+str(size_output_py)+"_"+str(n_layers_py)+"_"+str(size_first_hidden_layer_py)+"_"+str(multiplier_py)+"_"+str(leak_rate_py)+"_"+\
-                    str(num_epochs_px)+"_"+str(learning_rate_px)+"_"+str(size_input_px)+"_"+str(size_output_px)+"_"+str(n_layers_px)+"_"+str(size_first_hidden_layer_px)+"_"+str(multiplier_px)+"_"+str(leak_rate_px)
-      model_hash = hashlib.sha512(output_file.encode()).hexdigest()[:MAX_HASH]
-      output_file_location = open(str(output_dir)+"/"+str(DETECTOR_VERSION)+"_"+str(detector_config)+"_"+str(SUBSYSTEM)+"_"+str(BENCHMARK)+"_"+str(model_hash)+".txt","w")
-      output_file_content = "--input_files\nresults/"+str(DETECTOR_VERSION)+"/"+str(detector_config)+"/detector_benchmarks/"+str(SUBSYSTEM)+"/"+str(BENCHMARK)+"/processed_data/"+str(DETECTOR_VERSION)+"_"+str(detector_config)+"_\n"+\
-                            "--model_version\n"+str(DETECTOR_VERSION)+"_"+str(detector_config)+"_"+str(SUBSYSTEM)+"_"+str(BENCHMARK)+"_"+str(model_hash)+"\n"+\
-                            "--nevents_per_file\n"+str(nevents_per_file)+"\n"+\
-                            "--num_training_inputs\n"+str(num_training_inputs)+"\n"+\
-                            "--num_epochs_pz\n"+str(num_epochs_pz)+"\n"+\
-                            "--learning_rate_pz\n"+str(learning_rate_pz)+"\n"+\
-                            "--size_input_pz\n"+str(size_input_pz)+"\n"+\
-                            "--size_output_pz\n"+str(size_output_pz)+"\n"+\
-                            "--n_layers_pz\n"+str(n_layers_pz)+"\n"+\
-                            "--size_first_hidden_layer_pz\n"+str(size_first_hidden_layer_pz)+"\n"+\
-                            "--multiplier_pz\n"+str(multiplier_pz)+"\n"+\
-                            "--leak_rate_pz\n"+str(leak_rate_pz)+"\n"+\
-                            "--num_epochs_py\n"+str(num_epochs_py)+"\n"+\
-                            "--learning_rate_py\n"+str(learning_rate_py)+"\n"+\
-                            "--size_input_py\n"+str(size_input_py)+"\n"+\
-                            "--size_output_py\n"+str(size_output_py)+"\n"+\
-                            "--n_layers_py\n"+str(n_layers_py)+"\n"+\
-                            "--size_first_hidden_layer_py\n"+str(size_first_hidden_layer_py)+"\n"+\
-                            "--multiplier_py\n"+str(multiplier_py)+"\n"+\
-                            "--leak_rate_py\n"+str(leak_rate_py)+"\n"+\
-                            "--num_epochs_px\n"+str(num_epochs_px)+"\n"+\
-                            "--learning_rate_px\n"+str(learning_rate_px)+"\n"+\
-                            "--size_input_px\n"+str(size_input_px)+"\n"+\
-                            "--size_output_px\n"+str(size_output_px)+"\n"+\
-                            "--n_layers_px\n"+str(n_layers_px)+"\n"+\
-                            "--size_first_hidden_layer_px\n"+str(size_first_hidden_layer_px)+"\n"+\
-                            "--multiplier_px\n"+str(multiplier_px)+"\n"+\
-                            "--leak_rate_px\n"+str(leak_rate_px)
-      output_file_location.write(output_file_content)
-      print(output_file_location)
-      output_file_location.close()
-         
-rule roman_pots_train_neural_networks:
+rule roman_pots_train_model_pz:
   input:
+    data = ["results/"+DETECTOR_VERSION+"/"+DETECTOR_CONFIG+"/detector_benchmarks/"+SUBSYSTEM+"/"+BENCHMARK+"/processed_data/"+DETECTOR_VERSION+"_"+DETECTOR_CONFIG+"_{index}.txt".format(index=index) for index in NFILES],
     script = "train_dense_neural_network.py"
+  params:
+    detector_version=DETECTOR_VERSION,
+    detector_config=DETECTOR_CONFIG,
+    subsystem=SUBSYSTEM,
+    benchmark=BENCHMARK 
   output:
-    expand("results/"+str(DETECTOR_VERSION)+"/{detector_config}/detector_benchmarks/"+str(SUBSYSTEM)+"/"+str(BENCHMARK)+"/trained_models/model_pz_"+str(DETECTOR_VERSION)+"_{detector_config}_"+str(SUBSYSTEM)+"_"+str(BENCHMARK)+"_{model_version}.pt",
-           detector_config=DETECTOR_CONFIG,
-           model_version=MODEL_VERSION),
-    expand("results/"+str(DETECTOR_VERSION)+"/{detector_config}/detector_benchmarks/"+str(SUBSYSTEM)+"/"+str(BENCHMARK)+"/trained_models/model_py_"+str(DETECTOR_VERSION)+"_{detector_config}_"+str(SUBSYSTEM)+"_"+str(BENCHMARK)+"_{model_version}.pt",
-           detector_config=DETECTOR_CONFIG,
-           model_version=MODEL_VERSION),
-    expand("results/"+str(DETECTOR_VERSION)+"/{detector_config}/detector_benchmarks/"+str(SUBSYSTEM)+"/"+str(BENCHMARK)+"/trained_models/model_px_"+str(DETECTOR_VERSION)+"_{detector_config}_"+str(SUBSYSTEM)+"_"+str(BENCHMARK)+"_{model_version}.pt",
-           detector_config=DETECTOR_CONFIG,
-           model_version=MODEL_VERSION),
-    expand("results/"+str(DETECTOR_VERSION)+"/{detector_config}/detector_benchmarks/"+str(SUBSYSTEM)+"/"+str(BENCHMARK)+"/artifacts/LossVsEpoch_model_pz_"+str(DETECTOR_VERSION)+"_{detector_config}_"+str(SUBSYSTEM)+"_"+str(BENCHMARK)+"_{model_version}.png",
-           detector_config=DETECTOR_CONFIG,
-           model_version=MODEL_VERSION),
-    expand("results/"+str(DETECTOR_VERSION)+"/{detector_config}/detector_benchmarks/"+str(SUBSYSTEM)+"/"+str(BENCHMARK)+"/artifacts/LossVsEpoch_model_py_"+str(DETECTOR_VERSION)+"_{detector_config}_"+str(SUBSYSTEM)+"_"+str(BENCHMARK)+"_{model_version}.png",
-           detector_config=DETECTOR_CONFIG,
-           model_version=MODEL_VERSION),
-    expand("results/"+str(DETECTOR_VERSION)+"/{detector_config}/detector_benchmarks/"+str(SUBSYSTEM)+"/"+str(BENCHMARK)+"/artifacts/LossVsEpoch_model_px_"+str(DETECTOR_VERSION)+"_{detector_config}_"+str(SUBSYSTEM)+"_"+str(BENCHMARK)+"_{model_version}.png",
-           detector_config=DETECTOR_CONFIG,
-           model_version=MODEL_VERSION)
-
-  run:
-    for detector_config, model_version in product(DETECTOR_CONFIG,MODEL_VERSION):
-      os.system("python "+str(input.script)+" results/"+str(DETECTOR_VERSION)+"/"+str(detector_config)+"/detector_benchmarks/"+str(SUBSYSTEM)+"/"+str(BENCHMARK)+"/metadata/"+str(DETECTOR_VERSION)+"_"+str(detector_config)+"_"+str(SUBSYSTEM)+"_"+str(BENCHMARK)+"_"+str(model_version)+".txt")
-      os.system("mv model_pz_"+str(DETECTOR_VERSION)+"_"+str(detector_config)+"_"+str(SUBSYSTEM)+"_"+str(BENCHMARK)+"_"+str(model_version)+".pt results/"+str(DETECTOR_VERSION)+"/"+str(detector_config)+"/detector_benchmarks/"+str(SUBSYSTEM)+"/"+str(BENCHMARK)+"/trained_models/")
-      os.system("mv model_py_"+str(DETECTOR_VERSION)+"_"+str(detector_config)+"_"+str(SUBSYSTEM)+"_"+str(BENCHMARK)+"_"+str(model_version)+".pt results/"+str(DETECTOR_VERSION)+"/"+str(detector_config)+"/detector_benchmarks/"+str(SUBSYSTEM)+"/"+str(BENCHMARK)+"/trained_models/")
-      os.system("mv model_px_"+str(DETECTOR_VERSION)+"_"+str(detector_config)+"_"+str(SUBSYSTEM)+"_"+str(BENCHMARK)+"_"+str(model_version)+".pt results/"+str(DETECTOR_VERSION)+"/"+str(detector_config)+"/detector_benchmarks/"+str(SUBSYSTEM)+"/"+str(BENCHMARK)+"/trained_models/")
-      os.system("mv LossVsEpoch_model_pz_"+str(DETECTOR_VERSION)+"_"+str(detector_config)+"_"+str(SUBSYSTEM)+"_"+str(BENCHMARK)+"_"+str(model_version)+".png results/"+str(DETECTOR_VERSION)+"/"+str(detector_config)+"/detector_benchmarks/"+str(SUBSYSTEM)+"/"+str(BENCHMARK)+"/artifacts/")
-      os.system("mv LossVsEpoch_model_py_"+str(DETECTOR_VERSION)+"_"+str(detector_config)+"_"+str(SUBSYSTEM)+"_"+str(BENCHMARK)+"_"+str(model_version)+".png results/"+str(DETECTOR_VERSION)+"/"+str(detector_config)+"/detector_benchmarks/"+str(SUBSYSTEM)+"/"+str(BENCHMARK)+"/artifacts/")
-      os.system("mv LossVsEpoch_model_px_"+str(DETECTOR_VERSION)+"_"+str(detector_config)+"_"+str(SUBSYSTEM)+"_"+str(BENCHMARK)+"_"+str(model_version)+".png results/"+str(DETECTOR_VERSION)+"/"+str(detector_config)+"/detector_benchmarks/"+str(SUBSYSTEM)+"/"+str(BENCHMARK)+"/artifacts/")
-
-   
+    "results/"+DETECTOR_VERSION+"/"+DETECTOR_CONFIG+"/detector_benchmarks/"+SUBSYSTEM+"/"+BENCHMARK+"/artifacts/model_pz/num_epochs_{num_epochs}/learning_rate_{learning_rate}/size_input_{size_input}/size_output_{size_output}/n_layers_{n_layers}/size_first_hidden_layer_{size_first_hidden_layer}/multiplier_{multiplier}/leak_rate_{leak_rate}/model_pz.pt",
+    "results/"+DETECTOR_VERSION+"/"+DETECTOR_CONFIG+"/detector_benchmarks/"+SUBSYSTEM+"/"+BENCHMARK+"/artifacts/model_pz/num_epochs_{num_epochs}/learning_rate_{learning_rate}/size_input_{size_input}/size_output_{size_output}/n_layers_{n_layers}/size_first_hidden_layer_{size_first_hidden_layer}/multiplier_{multiplier}/leak_rate_{leak_rate}/LossVsEpoch_model_pz.png"
+  shell:
+    """
+    python {input.script} --input_files {input.data} --model_name model_pz --model_dir results/{params.detector_version}/{params.detector_config}/detector_benchmarks/{params.subsystem}/{params.benchmark}/artifacts/model_pz/num_epochs_{wildcards.num_epochs}/learning_rate_{wildcards.learning_rate}/size_input_{wildcards.size_input}/size_output_{wildcards.size_output}/n_layers_{wildcards.n_layers}/size_first_hidden_layer_{wildcards.size_first_hidden_layer}/multiplier_{wildcards.multiplier}/leak_rate_{wildcards.leak_rate} --num_epochs {wildcards.num_epochs} --learning_rate {wildcards.learning_rate} --size_input {wildcards.size_input} --size_output {wildcards.size_output} --n_layers {wildcards.n_layers} --size_first_hidden_layer {wildcards.size_first_hidden_layer} --multiplier {wildcards.multiplier} --leak_rate {wildcards.leak_rate}
+    """
 
+rule roman_pots_train_model_py:
+  input:
+    data = ["results/"+DETECTOR_VERSION+"/"+DETECTOR_CONFIG+"/detector_benchmarks/"+SUBSYSTEM+"/"+BENCHMARK+"/processed_data/"+DETECTOR_VERSION+"_"+DETECTOR_CONFIG+"_{index}.txt".format(index=index) for index in NFILES],
+    script = "train_dense_neural_network.py"
+  params:
+    detector_version=DETECTOR_VERSION,
+    detector_config=DETECTOR_CONFIG,
+    subsystem=SUBSYSTEM,
+    benchmark=BENCHMARK
+  output:
+    "results/"+DETECTOR_VERSION+"/"+DETECTOR_CONFIG+"/detector_benchmarks/"+SUBSYSTEM+"/"+BENCHMARK+"/artifacts/model_py/num_epochs_{num_epochs}/learning_rate_{learning_rate}/size_input_{size_input}/size_output_{size_output}/n_layers_{n_layers}/size_first_hidden_layer_{size_first_hidden_layer}/multiplier_{multiplier}/leak_rate_{leak_rate}/model_py.pt",
+    "results/"+DETECTOR_VERSION+"/"+DETECTOR_CONFIG+"/detector_benchmarks/"+SUBSYSTEM+"/"+BENCHMARK+"/artifacts/model_py/num_epochs_{num_epochs}/learning_rate_{learning_rate}/size_input_{size_input}/size_output_{size_output}/n_layers_{n_layers}/size_first_hidden_layer_{size_first_hidden_layer}/multiplier_{multiplier}/leak_rate_{leak_rate}/LossVsEpoch_model_py.png"
+  shell:
+    """
+    python {input.script} --input_files {input.data} --model_name model_py --model_dir results/{params.detector_version}/{params.detector_config}/detector_benchmarks/{params.subsystem}/{params.benchmark}/artifacts/model_py/num_epochs_{wildcards.num_epochs}/learning_rate_{wildcards.learning_rate}/size_input_{wildcards.size_input}/size_output_{wildcards.size_output}/n_layers_{wildcards.n_layers}/size_first_hidden_layer_{wildcards.size_first_hidden_layer}/multiplier_{wildcards.multiplier}/leak_rate_{wildcards.leak_rate} --num_epochs {wildcards.num_epochs} --learning_rate {wildcards.learning_rate} --size_input {wildcards.size_input} --size_output {wildcards.size_output} --n_layers {wildcards.n_layers} --size_first_hidden_layer {wildcards.size_first_hidden_layer} --multiplier {wildcards.multiplier} --leak_rate {wildcards.leak_rate}
+    """
 
+rule roman_pots_train_model_px:
+  input:
+    data = ["results/"+DETECTOR_VERSION+"/"+DETECTOR_CONFIG+"/detector_benchmarks/"+SUBSYSTEM+"/"+BENCHMARK+"/processed_data/"+DETECTOR_VERSION+"_"+DETECTOR_CONFIG+"_{index}.txt".format(index=index) for index in NFILES],
+    script = "train_dense_neural_network.py"
+  params:
+    detector_version=DETECTOR_VERSION,
+    detector_config=DETECTOR_CONFIG,
+    subsystem=SUBSYSTEM,
+    benchmark=BENCHMARK
+  output:
+    "results/"+DETECTOR_VERSION+"/"+DETECTOR_CONFIG+"/detector_benchmarks/"+SUBSYSTEM+"/"+BENCHMARK+"/artifacts/model_px/num_epochs_{num_epochs}/learning_rate_{learning_rate}/size_input_{size_input}/size_output_{size_output}/n_layers_{n_layers}/size_first_hidden_layer_{size_first_hidden_layer}/multiplier_{multiplier}/leak_rate_{leak_rate}/model_px.pt",
+    "results/"+DETECTOR_VERSION+"/"+DETECTOR_CONFIG+"/detector_benchmarks/"+SUBSYSTEM+"/"+BENCHMARK+"/artifacts/model_px/num_epochs_{num_epochs}/learning_rate_{learning_rate}/size_input_{size_input}/size_output_{size_output}/n_layers_{n_layers}/size_first_hidden_layer_{size_first_hidden_layer}/multiplier_{multiplier}/leak_rate_{leak_rate}/LossVsEpoch_model_px.png"
+  shell:
+    """
+    python {input.script} --input_files {input.data} --model_name model_px --model_dir results/{params.detector_version}/{params.detector_config}/detector_benchmarks/{params.subsystem}/{params.benchmark}/artifacts/model_px/num_epochs_{wildcards.num_epochs}/learning_rate_{wildcards.learning_rate}/size_input_{wildcards.size_input}/size_output_{wildcards.size_output}/n_layers_{wildcards.n_layers}/size_first_hidden_layer_{wildcards.size_first_hidden_layer}/multiplier_{wildcards.multiplier}/leak_rate_{wildcards.leak_rate} --num_epochs {wildcards.num_epochs} --learning_rate {wildcards.learning_rate} --size_input {wildcards.size_input} --size_output {wildcards.size_output} --n_layers {wildcards.n_layers} --size_first_hidden_layer {wildcards.size_first_hidden_layer} --multiplier {wildcards.multiplier} --leak_rate {wildcards.leak_rate}
+    """
     
diff --git a/benchmarks/roman_pots/train_dense_neural_network.py b/benchmarks/roman_pots/train_dense_neural_network.py
index 5f9dec91..36bd9bd6 100644
--- a/benchmarks/roman_pots/train_dense_neural_network.py
+++ b/benchmarks/roman_pots/train_dense_neural_network.py
@@ -8,6 +8,8 @@
 import matplotlib.pyplot as plt
 import argparse
 import sys
+import hashlib
+
 torch.set_default_dtype(torch.float32)
 
 if torch.cuda.is_available():
@@ -47,30 +49,13 @@ def standardize(x):
   standardized_tensor = (x - mean) / std
   return standardized_tensor, mean, std
 
-def train_model(name, input_tensor, target_tensor, model, hyperparameters):
-  # Set hyperparameters
-  match name:
-    case "model_pz":
-      num_epochs = int(hyperparameters.num_epochs_pz)
-      learning_rate = float(hyperparameters.learning_rate_pz)
-    case "model_py":
-      num_epochs = int(hyperparameters.num_epochs_py)
-      learning_rate = float(hyperparameters.learning_rate_py)
-    case "model_px":
-      num_epochs = int(hyperparameters.num_epochs_px)
-      learning_rate = float(hyperparameters.learning_rate_px)
-    case _:
-      print("No model name provided. Return without further processing")
-      return
-  print("Set number of epochs and learning rate to "+str(num_epochs)+" and "+str(learning_rate)+" for "+str(name)+" training.")
-
-
+def train_model(input_tensor, target_tensor, model, hyperparameters):
   # Send model to device
   model=model.to(device)
   
   # Define the loss function and optimizer
   criterion = torch.nn.HuberLoss(reduction='mean', delta=1.0)
-  optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
+  optimizer = torch.optim.Adam(model.parameters(), lr=hyperparameters.learning_rate)
 
   # Create a learning rate scheduler
   scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,'min',patience=100,cooldown=100,factor=0.5,threshold=1e-4,verbose=True)
@@ -79,7 +64,7 @@ def train_model(name, input_tensor, target_tensor, model, hyperparameters):
   losses = []
 
   # Train the model
-  for epoch in range(num_epochs):
+  for epoch in range(hyperparameters.num_epochs):
     # Forward pass
     inputs, targets = input_tensor.to(device), target_tensor.to(device)
     predictions = model(inputs)
@@ -98,18 +83,18 @@ def train_model(name, input_tensor, target_tensor, model, hyperparameters):
 
     # Print progress
     if (epoch + 1) % 10 == 0:
-      print("Epoch "+str(epoch+1)+"/"+str(num_epochs)+", Loss: "+"{0:0.10f}".format(loss.item()))
+      print("Epoch "+str(epoch+1)+"/"+str(hyperparameters.num_epochs)+", Loss: "+"{0:0.10f}".format(loss.item()))
 
   # Plot the loss values
   plt.figure()
-  plt.plot(range(1, num_epochs+1), losses)
+  plt.plot(range(1, hyperparameters.num_epochs+1), losses)
   plt.xlabel('Epoch')
   plt.ylabel('Loss')
   plt.title('Loss as a Function of Epoch')
   plt.yscale('log')
-  plt.savefig("LossVsEpoch_"+name+"_"+str(hyperparameters.model_version)+".png")
+  plt.savefig(hyperparameters.model_dir+"/LossVsEpoch_"+hyperparameters.model_name+".png")
 
-  torch.jit.script(model).save(name+"_"+str(hyperparameters.model_version)+".pt")
+  torch.jit.script(model).save(hyperparameters.model_dir+"/"+hyperparameters.model_name+".pt")
   return
 
 def run_experiment(hyperparameters):
@@ -117,63 +102,62 @@ def run_experiment(hyperparameters):
   # Load training data in tensors
   training_data = pd.DataFrame()
 
-  for i in range(1,int(hyperparameters.num_training_inputs)+1):
-    temp_training_data = pd.read_csv(hyperparameters.input_files+str(i)+'.txt', delimiter='\t', header=None)
+  for i in hyperparameters.input_files:
+    temp_training_data = pd.read_csv(i, delimiter='\t', header=None)
     training_data = pd.concat([training_data, temp_training_data], ignore_index=True)
 
   training_RP_pos_tensor = torch.tensor(training_data.iloc[:,3:7].values, dtype=torch.float32)
   training_MC_mom_tensor = torch.tensor(training_data.iloc[:,0:3].values, dtype=torch.float32)
 
   # Standardize training data
-  source_pz = training_RP_pos_tensor
-  scaled_source_pz, mean_source_pz, std_source_pz = standardize(source_pz)
-  target_pz = training_MC_mom_tensor[:,2].unsqueeze(1)
-
-  source_py = torch.cat((training_RP_pos_tensor[:,2:4], training_MC_mom_tensor[:,2].unsqueeze(1)), 1)
-  scaled_source_py, mean_source_py, std_source_py = standardize(source_py)
-  target_py = training_MC_mom_tensor[:,1].unsqueeze(1)
+  match hyperparameters.model_name:
+    case "model_pz":
+      source = training_RP_pos_tensor
+      scaled_source, mean_source, std_source = standardize(source)
+      target = training_MC_mom_tensor[:,2].unsqueeze(1)
+    
+    case "model_py":
+      source = torch.cat((training_RP_pos_tensor[:,2:4], training_MC_mom_tensor[:,2].unsqueeze(1)), 1)
+      scaled_source, mean_source, std_source = standardize(source)
+      target = training_MC_mom_tensor[:,1].unsqueeze(1)
+   
+    case "model_px":
+      source = torch.cat((training_RP_pos_tensor[:,0:2], training_MC_mom_tensor[:,2].unsqueeze(1)), 1)
+      scaled_source, mean_source, std_source = standardize(source)
+      target = training_MC_mom_tensor[:,0].unsqueeze(1)
 
-  source_px = torch.cat((training_RP_pos_tensor[:,0:2], training_MC_mom_tensor[:,2].unsqueeze(1)), 1)
-  scaled_source_px, mean_source_px, std_source_px = standardize(source_px)
-  target_px = training_MC_mom_tensor[:,0].unsqueeze(1)
+    case _:
+      print("No model name provided. Stop further processing")
+      return
 
   # Initialize models
-  initial_model_pz = NeuralNet(size_input=int(hyperparameters.size_input_pz),
-                               size_output=int(hyperparameters.size_output_pz), 
-                               n_layers=int(hyperparameters.n_layers_pz),
-                               size_first_hidden_layer=int(hyperparameters.size_first_hidden_layer_pz),
-                               multiplier=float(hyperparameters.multiplier_pz),
-                               leak_rate=float(hyperparameters.leak_rate_pz))
-  initial_model_py = NeuralNet(size_input=int(hyperparameters.size_input_py),
-                               size_output=int(hyperparameters.size_output_py), 
-                               n_layers=int(hyperparameters.n_layers_py),
-                               size_first_hidden_layer=int(hyperparameters.size_first_hidden_layer_py),
-                               multiplier=float(hyperparameters.multiplier_py),
-                               leak_rate=float(hyperparameters.leak_rate_py))
-  initial_model_px = NeuralNet(size_input=int(hyperparameters.size_input_px),
-                               size_output=int(hyperparameters.size_output_px), 
-                               n_layers=int(hyperparameters.n_layers_px),
-                               size_first_hidden_layer=int(hyperparameters.size_first_hidden_layer_px),
-                               multiplier=float(hyperparameters.multiplier_px),
-                               leak_rate=float(hyperparameters.leak_rate_px))
-  
+  initial_model = NeuralNet(size_input=int(hyperparameters.size_input),
+                               size_output=int(hyperparameters.size_output), 
+                               n_layers=int(hyperparameters.n_layers),
+                               size_first_hidden_layer=int(hyperparameters.size_first_hidden_layer),
+                               multiplier=float(hyperparameters.multiplier),
+                               leak_rate=float(hyperparameters.leak_rate)) 
+ 
   # Train models
-  train_model("model_pz", scaled_source_pz, target_pz, initial_model_pz, hyperparameters)
-  train_model("model_py", scaled_source_py, target_py, initial_model_py, hyperparameters)
-  train_model("model_px", scaled_source_px, target_px, initial_model_px, hyperparameters)
-
+  train_model(scaled_source, target, initial_model, hyperparameters)
+  
   # Print end statement
-  print("Training completed using "+str(int(hyperparameters.nevents_per_file)*int(hyperparameters.num_training_inputs))+" generated events.")
+  print("Training completed using "+str(len(hyperparameters.input_files))+" files with "+str(training_RP_pos_tensor.shape[0])+" eligible events")
 
 if __name__ == "__main__":
-  parser = argparse.ArgumentParser(fromfile_prefix_chars='@')
-  hyperparameters_list = ['--input_files', '--model_version', '--nevents_per_file', '--num_training_inputs', 
-                   '--num_epochs_pz', '--learning_rate_pz', '--size_input_pz', '--size_output_pz', '--n_layers_pz', '--size_first_hidden_layer_pz', '--multiplier_pz', '--leak_rate_pz',
-                   '--num_epochs_py', '--learning_rate_py', '--size_input_py', '--size_output_py', '--n_layers_py', '--size_first_hidden_layer_py', '--multiplier_py', '--leak_rate_py',
-                   '--num_epochs_px', '--learning_rate_px', '--size_input_px', '--size_output_px', '--n_layers_px', '--size_first_hidden_layer_px', '--multiplier_px', '--leak_rate_px']
-  for hyperparameter in hyperparameters_list:
-    parser.add_argument(hyperparameter)
-  hyperparameters = parser.parse_args(['@'+str(sys.argv[1])])
+  parser = argparse.ArgumentParser(description="Train neural network model for roman pots")
+  parser.add_argument('--input_files', type=str, nargs='+', required=True, help='Specify a location of input files.')  
+  parser.add_argument('--model_name', type=str, required=True, help='Specify model name.')
+  parser.add_argument('--model_dir', type=str, required=True, help='Specify location to save model')
+  parser.add_argument('--num_epochs', type=int, required=True, help='Specify number of epochs')
+  parser.add_argument('--learning_rate', type=float, required=True, help='Specify learning rate')
+  parser.add_argument('--size_input', type=int, required=True, help='Specify input size')
+  parser.add_argument('--size_output', type=int, required=True, help='Specify output size')
+  parser.add_argument('--n_layers', type=int, required=True, help='Specify number of layers')
+  parser.add_argument('--size_first_hidden_layer', type=int, required=True, help='Size of first hidden layer')
+  parser.add_argument('--multiplier', type=float, required=True, help='Specify mutilplier to calculate size of subsequent hidden layers')
+  parser.add_argument('--leak_rate', type=float, required=True, help='Specify leak rate')
+  hyperparameters = parser.parse_args()
   run_experiment(hyperparameters)