From c221e3b3de250c12637fc4051a1d1ca69a86164d Mon Sep 17 00:00:00 2001 From: Dmitry Kalinkin Date: Wed, 14 Aug 2024 19:05:21 -0400 Subject: [PATCH] Add backwards_ecal benchmark --- Snakefile | 1 + benchmarks/backwards_ecal/Snakefile | 90 +++++++++ benchmarks/backwards_ecal/backwards_ecal.org | 200 +++++++++++++++++++ benchmarks/backwards_ecal/config.yml | 28 +++ benchmarks/backwards_ecal/requirements.txt | 4 + 5 files changed, 323 insertions(+) create mode 100644 benchmarks/backwards_ecal/Snakefile create mode 100644 benchmarks/backwards_ecal/backwards_ecal.org create mode 100644 benchmarks/backwards_ecal/config.yml create mode 100644 benchmarks/backwards_ecal/requirements.txt diff --git a/Snakefile b/Snakefile index c452ba60..f8a02857 100644 --- a/Snakefile +++ b/Snakefile @@ -1,6 +1,7 @@ configfile: "snakemake.yml" include: "benchmarks/backgrounds/Snakefile" +include: "benchmarks/backwards_ecal/Snakefile" include: "benchmarks/barrel_ecal/Snakefile" include: "benchmarks/ecal_gaps/Snakefile" include: "benchmarks/material_scan/Snakefile" diff --git a/benchmarks/backwards_ecal/Snakefile b/benchmarks/backwards_ecal/Snakefile new file mode 100644 index 00000000..843202fe --- /dev/null +++ b/benchmarks/backwards_ecal/Snakefile @@ -0,0 +1,90 @@ +rule backwards_ecal_sim: + input: + steering_file="EPIC/EVGEN/SINGLE/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.steer", + warmup="warmup/{DETECTOR_CONFIG}.edm4hep.root", + output: + "sim_output/backwards_ecal/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root", + log: + "sim_output/backwards_ecal/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root.log", + wildcard_constraints: + PARTICLE="(e-|pi-)", + ENERGY="[0-9]+[kMG]eV", + PHASE_SPACE="(3to50|45to135|130to177)deg", + INDEX="\d{4}", + params: + N_EVENTS=1000 + shell: + """ +set -m # monitor mode to prevent lingering processes +exec ddsim \ + --runType batch \ + --enableGun \ + --steeringFile "{input.steering_file}" \ + --random.seed 1{wildcards.INDEX} \ + --filter.tracker edep0 \ + -v WARNING \ + --numberOfEvents {params.N_EVENTS} \ + --compactFile $DETECTOR_PATH/{wildcards.DETECTOR_CONFIG}.xml \ + --outputFile {output} +""" + + +rule backwards_ecal_recon: + input: + "sim_output/backwards_ecal/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root", + output: + "sim_output/backwards_ecal/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.eicrecon.tree.edm4eic.root", + log: + "sim_output/backwards_ecal/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.eicrecon.tree.edm4eic.root.log", + wildcard_constraints: + INDEX="\d{4}", + shell: """ +set -m # monitor mode to prevent lingering processes +exec env DETECTOR_CONFIG={wildcards.DETECTOR_CONFIG} \ + eicrecon {input} -Ppodio:output_file={output} \ + -Ppodio:output_include_collections=MCParticles,EcalEndcapNRecHits,EcalEndcapNClusters +""" + + +rule backwards_ecal_simulate: + input: + expand( + "sim_output/backwards_ecal/{{DETECTOR_CONFIG}}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX:04d}.eicrecon.tree.edm4eic.root", + PARTICLE=["pi-", "e-"], + ENERGY=[ + "100MeV", + "200MeV", + "500MeV", + "1GeV", + "2GeV", + "5GeV", + "10GeV", + "20GeV", + ], + PHASE_SPACE=["130to177deg"], + INDEX=range(20), + ), + output: + touch("sim_output/backwards_ecal/{DETECTOR_CONFIG}/flag"), + + +DETECTOR_CONFIG=os.environ["DETECTOR_CONFIG"] + +rule backwards_ecal: + input: + sim_output="sim_output/backwards_ecal/" + DETECTOR_CONFIG + "/flag", + matplotlibrc=".matplotlibrc", + script="benchmarks/backwards_ecal/backwards_ecal.py", + output: + directory("results/backwards_ecal") + threads: workflow.cores + shell: + """ +env \ +MATPLOTLIBRC={input.matplotlibrc} \ +DETECTOR_CONFIG=""" + DETECTOR_CONFIG + """ \ +PLOT_TITLE=""" + DETECTOR_CONFIG + """ \ +INPUT_PATH_FORMAT=sim_output/backwards_ecal/""" + DETECTOR_CONFIG + """/{{particle}}/{{energy}}/130to177deg/{{particle}}_{{energy}}_130to177deg.{{ix:04d}}.eicrecon.tree.edm4eic.root \ +OUTPUT_DIR={output} \ +python {input.script} +""" diff --git a/benchmarks/backwards_ecal/backwards_ecal.org b/benchmarks/backwards_ecal/backwards_ecal.org new file mode 100644 index 00000000..d55ff7d4 --- /dev/null +++ b/benchmarks/backwards_ecal/backwards_ecal.org @@ -0,0 +1,200 @@ +#+PROPERTY: header-args:jupyter-python :session /jpy:localhost#8888:backwards_ecal :async yes :results drawer :exports both + +#+TITLE: ePIC EEEMCal benchmark +#+AUTHOR: Dmitry Kalinkin +#+OPTIONS: d:t + +#+LATEX_CLASS_OPTIONS: [9pt,letter] +#+BIND: org-latex-image-default-width "" +#+BIND: org-latex-image-default-option "scale=0.3" +#+BIND: org-latex-images-centered nil +#+BIND: org-latex-minted-options (("breaklines") ("bgcolor" "black!5") ("frame" "single")) +#+LATEX_HEADER: \usepackage[margin=1in]{geometry} +#+LATEX_HEADER: \setlength{\parindent}{0pt} +#+LATEX: \sloppy + +#+begin_src jupyter-python :results silent +import os +from pathlib import Path + +import awkward as ak +import numpy as np +import vector +import uproot +from sklearn.metrics import roc_curve +#+end_src + +#+begin_src jupyter-python +vector.register_awkward() +#+end_src + +* Plotting setup + +#+begin_src jupyter-python :results silent +import matplotlib as mpl +import matplotlib.pyplot as plt + +def setup_presentation_style(): + mpl.rcParams.update(mpl.rcParamsDefault) + plt.style.use('ggplot') + plt.rcParams.update({ + 'axes.labelsize': 8, + 'axes.titlesize': 9, + 'figure.titlesize': 9, + 'figure.figsize': (4, 3), + 'legend.fontsize': 7, + 'xtick.labelsize': 8, + 'ytick.labelsize': 8, + 'pgf.rcfonts': False, + }) + +setup_presentation_style() +#+end_src + +* Parameters + +#+begin_src jupyter-python :results silent +DETECTOR_CONFIG=os.environ.get("DETECTOR_CONFIG") +PLOT_TITLE=os.environ.get("PLOT_TITLE") +INPUT_PATH_FORMAT=os.environ.get("INPUT_PATH_FORMAT", "EPIC/RECO/24.04.0/epic_craterlake/SINGLE/{particle}/{energy}/130to177deg/{particle}_{energy}_130to177deg.{ix:04d}.eicrecon.tree.edm4eic.root") +INPUT_PATH_INDEX_RANGE=list(range(20)) + +output_dir=Path(os.environ.get("OUTPUT_DIR", "./")) +output_dir.mkdir(parents=True, exist_ok=True) +#+end_src + +* Analysis + +First, we define a requirement on what phase we will consider for our +analysis. The following function filters single-particle events that +are thrown within $-3.5 < \eta < -2.0$: + +#+begin_src jupyter-python +def filter_pointing(events): + part_momentum = ak.zip({ + "m": events["MCParticles.mass"], + "px": events["MCParticles.momentum.x"], + "py": events["MCParticles.momentum.y"], + "pz": events["MCParticles.momentum.z"], + }, with_name="Momentum4D") + cond = (part_momentum.eta[:,0] > -3.5) & (part_momentum.eta[:,0] < -2.) + return events[cond] +#+end_src + +#+begin_src jupyter-python +energies = [ + "100MeV", + "200MeV", + "500MeV", + "1GeV", + "2GeV", + "5GeV", + "10GeV", + "20GeV", +] + +filter_name = [ + "MCParticles.*", + "*EcalEndcapNRecHits*", + "*EcalEndcapNClusters*", +] + +pi_eval = {} +e_eval = {} + +for energy in energies: + pi_eval[energy] = filter_pointing(uproot.concatenate( + {INPUT_PATH_FORMAT.format(particle="pi-", energy=energy, ix=ix): "events" for ix in INPUT_PATH_INDEX_RANGE}, + filter_name=filter_name, + )) + e_eval[energy] = filter_pointing(uproot.concatenate( + {INPUT_PATH_FORMAT.format(particle="e-", energy=energy, ix=ix): "events" for ix in INPUT_PATH_INDEX_RANGE}, + filter_name=filter_name, + )) +#+end_src + +** Pion rejection + +#+begin_src jupyter-python +fig, axs = plt.subplots(2, 4, sharex=True, sharey=True, figsize=(15, 6)) +fig_log, axs_log = plt.subplots(2, 4, sharex=True, sharey=True, figsize=(15, 6)) +fig_roc, axs_roc = plt.subplots(2, 4, sharex=True, sharey=True, figsize=(15, 6)) + +fig.suptitle(PLOT_TITLE) +fig_log.suptitle(PLOT_TITLE) +fig_roc.suptitle(PLOT_TITLE) + +axs = np.ravel(np.array(axs)) +axs_log = np.ravel(np.array(axs_log)) +axs_roc = np.ravel(np.array(axs_roc)) + +rocs = {} + +for ix, energy in enumerate(energies): + for use_clusters in [False, True]: + energy_value = float(energy.replace("GeV", "").replace("MeV", "e-3")) + if use_clusters: + clf_label = "leading cluster" + else: + clf_label = "sum all hits" + def clf(events): + if use_clusters: + return ak.drop_none(ak.max(events["EcalEndcapNClusters.energy"], axis=-1)) / energy_value + else: + return ak.sum(events["EcalEndcapNRecHits.energy"], axis=-1) / energy_value + e_pred = clf(e_eval[energy]) + pi_pred = clf(pi_eval[energy]) + + for do_log, ax in [(False, axs[ix]), (True, axs_log[ix])]: + plt.sca(ax) + plt.hist(e_pred, bins=np.linspace(0., 1.01, 101), label=rf"$e^-$ {clf_label}", hatch=None if use_clusters else r"xxx", alpha=0.8 if use_clusters else 1.) + plt.hist(pi_pred, bins=np.linspace(0., 1.01, 101), label=rf"$\pi^-$ {clf_label}", histtype="step") + plt.title(f"{energy}") + plt.legend() + plt.xlabel("Classifier output", loc="right") + plt.ylabel("Number of events", loc="top") + if do_log: + plt.yscale("log") + + plt.sca(axs_roc[ix]) + fpr, tpr, _ = roc_curve( + np.concatenate([np.ones_like(e_pred), np.zeros_like(pi_pred)]), + np.concatenate([e_pred, pi_pred]), + ) + cond = fpr != 0 # avoid infinite rejection (region of large uncertainty) + cond &= tpr != 1 # avoid linear interpolation (region of large uncertainty) + def mk_interp(tpr, fpr): + def interp(eff): + return np.interp(eff, tpr, fpr) + return interp + rocs.setdefault(clf_label, {})[energy] = mk_interp(tpr, fpr) + plt.plot(tpr[cond] * 100, 1 / fpr[cond], label=f"{clf_label}") + plt.yscale("log") + plt.title(f"{energy}") + plt.legend(loc="lower left") + plt.xlabel("Electron efficiency, %", loc="right") + plt.ylabel("Pion rejection factor", loc="top") + +fig.savefig(output_dir / f"pred.pdf", bbox_inches="tight") +plt.close(fig) +fig_log.savefig(output_dir / f"pred_log.pdf", bbox_inches="tight") +fig_log.show() +fig_roc.savefig(output_dir / f"roc.pdf", bbox_inches="tight") +fig_roc.show() + +plt.figure() +for clf_label, roc in rocs.items(): + plt.plot( + [float(energy.replace("GeV", "").replace("MeV", "e-3")) for energy in energies], + [1 / roc[energy](0.95) for energy in energies], + marker=".", + label=f"{clf_label}", + ) +plt.yscale("log") +plt.title(INPUT_PATH_FORMAT) +plt.legend() +plt.xlabel("Energy, GeV", loc="right") +plt.ylabel("Pion rejection at 95%", loc="top") +plt.savefig(output_dir / f"pion_rej.pdf", bbox_inches="tight") +plt.show() +#+end_src diff --git a/benchmarks/backwards_ecal/config.yml b/benchmarks/backwards_ecal/config.yml new file mode 100644 index 00000000..9d36dc57 --- /dev/null +++ b/benchmarks/backwards_ecal/config.yml @@ -0,0 +1,28 @@ +sim:backwards_ecal: + extends: .det_benchmark + stage: simulate + script: + - mkdir -p $LOCAL_DATA_PATH/input + - ln -s $LOCAL_DATA_PATH/input input + - | + snakemake --cores 5 \ + sim_output/backwards_ecal/epic_craterlake + +bench:backwards_ecal_emcal_backwards: + extends: .det_benchmark + stage: benchmarks + needs: + - ["sim:backwards_ecal"] + script: + - ln -s $LOCAL_DATA_PATH/input input + - export PYTHONUSERBASE=$LOCAL_DATA_PATH/deps + - pip install -r benchmarks/backwards_ecal/requirements.txt + - snakemake --cores 8 backwards_ecal_ecal_backwards + +collect_results:backwards_ecal: + extends: .det_benchmark + stage: collect + needs: + - "bench:backwards_ecal_emcal_backwards" + script: + - ls -lrht diff --git a/benchmarks/backwards_ecal/requirements.txt b/benchmarks/backwards_ecal/requirements.txt new file mode 100644 index 00000000..3d0f6336 --- /dev/null +++ b/benchmarks/backwards_ecal/requirements.txt @@ -0,0 +1,4 @@ +awkward >= 2.4.0 +scikit-learn +uproot >= 5.2.0 +vector