Merge branch 'main' of github.com:WMass/WRemnants into addHistAndBugFix

Merging from WMass/main
WMass · Oct 4, 2024 · 7d12040 · 7d12040
2 parents a5c7a76 + d823d98
commit 7d12040
Show file tree

Hide file tree

Showing 26 changed files with 646 additions and 304 deletions.
diff --git a/.github/workflows/unfolding.yml b/.github/workflows/unfolding.yml
@@ -412,14 +412,14 @@ jobs:
         # run with a reduced binning
         if: github.event_name == 'pull_request' || github.event.schedule == '30 5 * * 1-5'
         run: >-
-          scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run_python.sh scripts/histmakers/mz_dilepton.py --dataPath $DATAPATH --excludeFlow
+          scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run_python.sh scripts/histmakers/mz_dilepton.py --dataPath $DATAPATH
           --analysisMode unfolding -j $((NTHREADS)) --maxFiles $((MAX_FILES)) --forceDefaultName -o $WREMNANTS_OUTDIR --axes ptll yll --postfix unfolding --genAxes ptVGen
 
       - name: dilepton analysis
         # run with full binning
         if: github.event_name != 'pull_request' && github.event.schedule != '30 5 * * 1-5'
         run: >-
-          scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run_python.sh scripts/histmakers/mz_dilepton.py --dataPath $DATAPATH --excludeFlow
+          scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run_python.sh scripts/histmakers/mz_dilepton.py --dataPath $DATAPATH
           --analysisMode unfolding -j $((NTHREADS)) --maxFiles $((MAX_FILES)) --forceDefaultName -o $WREMNANTS_OUTDIR --axes ptll yll --postfix unfolding
 
 

diff --git a/narf b/narf
diff --git a/scripts/ci/run_with_singularity.sh b/scripts/ci/run_with_singularity.sh
@@ -3,7 +3,7 @@ export APPTAINER_BIND="/scratch,/cvmfs"
 if [[ -d $WREM_BASE ]]; then
     export APPTAINER_BIND="${APPTAINER_BIND},${WREM_BASE}/.."
 fi
-CONTAINER=/cvmfs/unpacked.cern.ch/gitlab-registry.cern.ch/bendavid/cmswmassdocker/wmassdevrolling\:v30
+CONTAINER=/cvmfs/unpacked.cern.ch/gitlab-registry.cern.ch/bendavid/cmswmassdocker/wmassdevrolling\:v38
 
 # Ensure kerberos permissions for eos access (requires systemd kerberos setup)
 if [ -d $XDG_RUNTIME_DIR/krb5cc ]; then

diff --git a/scripts/histmakers/mw_lowPU.py b/scripts/histmakers/mw_lowPU.py
@@ -6,7 +6,7 @@
 
 analysis_label = Datagroups.analysisLabel(os.path.basename(__file__))
 parser,initargs = common.common_parser(analysis_label)
-parser.add_argument("--lumiUncertainty", type=float, help="Uncertainty for luminosity in excess to 1 (e.g. 1.017 means 1.7\%)", default=1.017)
+parser.add_argument("--lumiUncertainty", type=float, help=r"Uncertainty for luminosity in excess to 1 (e.g. 1.017 means 1.7%)", default=1.017)
 parser.add_argument("--noGenMatchMC", action='store_true', help="Don't use gen match filter for prompt muons with MC samples (note: QCD MC never has it anyway)")
 parser.add_argument("--flavor", type=str, choices=["e", "mu"], help="Flavor (e or mu)", default="mu")
 

diff --git a/scripts/histmakers/mw_with_mu_eta_pt.py b/scripts/histmakers/mw_with_mu_eta_pt.py
@@ -24,7 +24,7 @@
 import pathlib
 import numpy as np
 
-parser.add_argument("--lumiUncertainty", type=float, help="Uncertainty for luminosity in excess to 1 (e.g. 1.012 means 1.2\%)", default=1.012)
+parser.add_argument("--lumiUncertainty", type=float, help=r"Uncertainty for luminosity in excess to 1 (e.g. 1.012 means 1.2%)", default=1.012)
 parser.add_argument("--noGenMatchMC", action='store_true', help="Don't use gen match filter for prompt muons with MC samples (note: QCD MC never has it anyway)")
 parser.add_argument("--halfStat", action='store_true', help="Test half data and MC stat, selecting odd events, just for tests")
 parser.add_argument("--makeMCefficiency", action="store_true", help="Save yields vs eta-pt-ut-passMT-passIso-passTrigger to derive 3D efficiencies for MC isolation and trigger (can run also with --onlyMainHistograms)")
@@ -68,7 +68,6 @@
 isFloatingPOIsTheoryAgnostic = isTheoryAgnostic and not isPoiAsNoi
 
 if isUnfolding or isTheoryAgnostic:
-    parser = common.set_parser_default(parser, "excludeFlow", True)
     if isTheoryAgnostic:
         if args.genAbsYVbinEdges and any(x < 0.0 for x in args.genAbsYVbinEdges):
             raise ValueError("Option --genAbsYVbinEdges requires all positive values. Please check")
@@ -78,7 +77,7 @@
 
 # axes for W MC efficiencies with uT dependence for iso and trigger
 axis_pt_eff_list = [24.,26.,28.,30.,32.,34.,36.,38.,40., 42., 44., 47., 50., 55., 60., 65.]
-axis_pt_eff = hist.axis.Variable(axis_pt_eff_list, name = "pt", overflow=not args.excludeFlow, underflow=not args.excludeFlow)
+axis_pt_eff = hist.axis.Variable(axis_pt_eff_list, name = "pt", overflow=False, underflow=False)
 if args.makeMCefficiency:
     # override the pt cuts (the binning is irrelevant since a different pt axis is used)
     nbinsPtEff = axis_pt_eff_list[-1] - axis_pt_eff_list[0]

diff --git a/scripts/histmakers/mz_dilepton.py b/scripts/histmakers/mz_dilepton.py
@@ -65,15 +65,14 @@
 
 # available axes for dilepton validation plots
 all_axes = {
-    # "mll": hist.axis.Regular(60, 60., 120., name = "mll", overflow=not args.excludeFlow, underflow=not args.excludeFlow),
-    "mll": hist.axis.Variable([60,70,75,78,80,82,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,100,102,105,110,120], name = "mll", overflow=not args.excludeFlow, underflow=not args.excludeFlow),
-    "yll": hist.axis.Regular(20, -2.5, 2.5, name = "yll", overflow=not args.excludeFlow, underflow=not args.excludeFlow),
-    "absYll": hist.axis.Regular(10, 0., 2.5, name = "absYll", underflow=False, overflow=not args.excludeFlow),
-    "ptll": hist.axis.Variable(dilepton_ptV_binning, name = "ptll", underflow=False, overflow=not args.excludeFlow),
+    "mll": hist.axis.Variable([60,70,75,78,80,82,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,100,102,105,110,120], name = "mll"),
+    "yll": hist.axis.Regular(20, -2.5, 2.5, name = "yll"),
+    "absYll": hist.axis.Regular(10, 0., 2.5, name = "absYll", underflow=False),
+    "ptll": hist.axis.Variable(dilepton_ptV_binning, name = "ptll", underflow=False),
     "etaPlus": hist.axis.Variable([-2.4,-1.2,-0.3,0.3,1.2,2.4], name = "etaPlus"),
     "etaMinus": hist.axis.Variable([-2.4,-1.2,-0.3,0.3,1.2,2.4], name = "etaMinus"),
-    "etaRegionSign": hist.axis.Regular(3, 0, 3, name = "etaRegionSign"),
-    "etaRegionRange": hist.axis.Regular(3, 0, 3, name = "etaRegionRange"),
+    "etaRegionSign": hist.axis.Regular(3, 0, 3, name = "etaRegionSign", underflow=False, overflow=False),
+    "etaRegionRange": hist.axis.Regular(3, 0, 3, name = "etaRegionRange", underflow=False, overflow=False),
     "absEtaPlus": hist.axis.Regular(8, 0, 2.4, name = "absEtaPlus"),
     "absEtaMinus": hist.axis.Regular(8, 0, 2.4, name = "absEtaMinus"),
     "etaAbsEta": hist.axis.Variable([-2.4, -2.0, -1.6, -1.4, -1.2, -1.0, -0.6, 0.0, 0.6, 1.0, 1.2, 1.4, 1.6, 2.0, 2.4], name = "etaAbsEta"),
@@ -84,12 +83,12 @@
     "cosThetaStarll": hist.axis.Regular(20, -1., 1., name = "cosThetaStarll", underflow=False, overflow=False),
     "phiStarll": hist.axis.Regular(20, -math.pi, math.pi, circular = True, name = "phiStarll"),
     #"charge": hist.axis.Regular(2, -2., 2., underflow=False, overflow=False, name = "charge") # categorical axes in python bindings always have an overflow bin, so use a regular
-    "massVgen": hist.axis.Variable(ewMassBins, name = "massVgen", overflow=not args.excludeFlow, underflow=not args.excludeFlow),
-    "ewMll": hist.axis.Variable(ewMassBins, name = "ewMll", overflow=not args.excludeFlow, underflow=not args.excludeFlow),
-    "ewMlly": hist.axis.Variable(ewMassBins, name = "ewMlly", overflow=not args.excludeFlow, underflow=not args.excludeFlow),
-    "ewLogDeltaM": hist.axis.Regular(100, -10, 4, name = "ewLogDeltaM", overflow=not args.excludeFlow, underflow=not args.excludeFlow),
-    "trigMuons_abseta0" : hist.axis.Regular(3, 0., 2.4, name = "trigMuons_abseta0", overflow=not args.excludeFlow, underflow=not args.excludeFlow),
-    "nonTrigMuons_eta0" : hist.axis.Regular(int(args.eta[0]), args.eta[1], args.eta[2], name = "nonTrigMuons_eta0", overflow=not args.excludeFlow, underflow=not args.excludeFlow),
+    "massVgen": hist.axis.Variable(ewMassBins, name = "massVgen"),
+    "ewMll": hist.axis.Variable(ewMassBins, name = "ewMll"),
+    "ewMlly": hist.axis.Variable(ewMassBins, name = "ewMlly"),
+    "ewLogDeltaM": hist.axis.Regular(100, -10, 4, name = "ewLogDeltaM"),
+    "trigMuons_abseta0" : hist.axis.Regular(3, 0., 2.4, name = "trigMuons_abseta0", underflow=False),
+    "nonTrigMuons_eta0" : hist.axis.Regular(int(args.eta[0]), args.eta[1], args.eta[2], name = "nonTrigMuons_eta0"),
     "nonTrigMuons_pt0" : hist.axis.Regular(int(args.pt[0]), args.pt[1], args.pt[2], name = "nonTrigMuons_pt0"),
     "nonTrigMuons_charge0" : hist.axis.Regular(2, -2., 2., underflow=False, overflow=False, name = "nonTrigMuons_charge0"),
 }
@@ -103,7 +102,7 @@
     all_axes["cosThetaStarll"] = hist.axis.Variable([-1, -0.56, -0.375, -0.19, 0., 0.19, 0.375, 0.56, 1.], name = "cosThetaStarll", underflow=False, overflow=False)
     all_axes["phiStarll"] = hist.axis.Variable([-math.pi, -2.27, -1.57, -0.87, 0, 0.87, 1.57, 2.27, math.pi], name = "phiStarll", underflow=False, overflow=False) 
     # 10 quantiles
-    all_axes["yll"] = hist.axis.Variable([-2.5, -1.5, -1.1, -0.7, -0.35, 0, 0.35, 0.7, 1.1, 1.5, 2.5], name = "yll", underflow=not args.excludeFlow, overflow=not args.excludeFlow)
+    all_axes["yll"] = hist.axis.Variable([-2.5, -1.5, -1.1, -0.7, -0.35, 0, 0.35, 0.7, 1.1, 1.5, 2.5], name = "yll")
 
     unfolding_axes, unfolding_cols, unfolding_selections = differential.get_dilepton_axes(
         args.genAxes, 
@@ -251,16 +250,16 @@ def build_graph(df, dataset):
                 axes = [*nominal_axes, *unfolding_axes] 
                 cols = [*nominal_cols, *unfolding_cols]
 
-    if not args.noAuxiliaryHistograms and isZ:
+    if not args.noAuxiliaryHistograms and isZ and len(auxiliary_gen_axes):
         # gen level variables before selection
-        for obs in auxiliary_gen_axes:
-            df_gen = df
-            df_gen = df_gen.DefinePerSample("exp_weight", "1.0")
-
-            df_gen = theory_tools.define_theory_weights_and_corrs(df_gen, dataset.name, corr_helpers, args)
+        df_gen = df
+        df_gen = df_gen.DefinePerSample("exp_weight", "1.0")
+        df_gen = theory_tools.define_theory_weights_and_corrs(df_gen, dataset.name, corr_helpers, args)
 
+        for obs in auxiliary_gen_axes:
             results.append(df_gen.HistoBoost(f"gen_{obs}", [all_axes[obs]], [obs, "nominal_weight"]))
-            df_gen = syst_tools.add_theory_hists(results, df_gen, args, dataset.name, corr_helpers, qcdScaleByHelicity_helper, [all_axes[obs]], [obs], base_name=f"gen_{obs}", for_wmass=False)
+            syst_tools.add_theory_hists(results, df_gen, args, dataset.name, corr_helpers, qcdScaleByHelicity_helper, [all_axes[obs]], [obs], base_name=f"gen_{obs}", for_wmass=False)
+
     df = df.Filter(muon_selections.hlt_string(era))
 
     df = muon_selections.veto_electrons(df)
@@ -435,13 +434,14 @@ def build_graph(df, dataset):
         else:
             results.append(df.HistoBoost(noiAsPoiHistName, [*nominal_axes, *unfolding_axes], [*nominal_cols, *unfolding_cols, "nominal_weight"]))  
 
-    for obs in ["ptll", "mll", "yll", "cosThetaStarll", "phiStarll", "etaPlus", "etaMinus", "ptPlus", "ptMinus"]:
-        if dataset.is_data:
-            results.append(df.HistoBoost(f"nominal_{obs}", [all_axes[obs]], [obs]))
-        else:
-            results.append(df.HistoBoost(f"nominal_{obs}", [all_axes[obs]], [obs, "nominal_weight"]))
-            if isWorZ:
-                df = syst_tools.add_theory_hists(results, df, args, dataset.name, corr_helpers, qcdScaleByHelicity_helper, [all_axes[obs]], [obs], base_name=f"nominal_{obs}", for_wmass=False)
+    if not args.noAuxiliaryHistograms:
+        for obs in ["ptll", "mll", "yll", "cosThetaStarll", "phiStarll", "etaPlus", "etaMinus", "ptPlus", "ptMinus"]:
+            if dataset.is_data:
+                results.append(df.HistoBoost(f"nominal_{obs}", [all_axes[obs]], [obs]))
+            else:
+                results.append(df.HistoBoost(f"nominal_{obs}", [all_axes[obs]], [obs, "nominal_weight"]))
+                if isWorZ:
+                    df = syst_tools.add_theory_hists(results, df, args, dataset.name, corr_helpers, qcdScaleByHelicity_helper, [all_axes[obs]], [obs], base_name=f"nominal_{obs}", for_wmass=False)
 
     if not args.noAuxiliaryHistograms and isZ:
         # gen level variables
@@ -659,7 +659,7 @@ def build_graph(df, dataset):
     return results, weightsum
 
 logger.debug(f"Datasets are {[d.name for d in datasets]}")
-resultdict = narf.build_and_run(datasets, build_graph)
+resultdict = narf.build_and_run(datasets[::-1], build_graph)
 
 if not args.noScaleToData:
     scale_to_data(resultdict)

diff --git a/scripts/histmakers/mz_wlike_with_mu_eta_pt.py b/scripts/histmakers/mz_wlike_with_mu_eta_pt.py
@@ -48,7 +48,6 @@
     parser = common.set_parser_default(parser, "muonIsolation", [0, 1])
 
 if isTheoryAgnostic:
-    parser = common.set_parser_default(parser, "excludeFlow", True)
     if args.genAbsYVbinEdges and any(x < 0.0 for x in args.genAbsYVbinEdges):
         raise ValueError("Option --genAbsYVbinEdges requires all positive values. Please check")
 

diff --git a/scripts/histmakers/w_z_gen_dists.py b/scripts/histmakers/w_z_gen_dists.py
@@ -93,8 +93,15 @@ def build_graph(df, dataset):
             common.get_gen_axes(common.get_dilepton_ptV_binning(), True, flow=True), 
             add_out_of_acceptance_axis=False,
         )
-        axis_ptVgen = unfolding_axes[0]
-        axis_absYVgen = unfolding_axes[1]
+        axis_absYVgen = hist.axis.Variable(
+            unfolding_axes[1].edges,
+            name = "absYVgen", underflow=False,
+        )
+        axis_ptVgen = hist.axis.Variable(
+            unfolding_axes[0].edges,
+            name = "ptVgen", underflow=False,
+        )
+
         axis_massZgen = hist.axis.Regular(1, 60., 120., name="massVgen")
 
     elif args.useTheoryAgnosticBinning:
@@ -150,7 +157,7 @@ def build_graph(df, dataset):
             mode += "_wlike"
 
         df = unfolding_tools.define_gen_level(df, "preFSR", dataset.name, mode=mode)
-        df = unfolding_tools.select_fiducial_space(df, mode=mode, fiducial=args.fiducial, unfolding=True)
+        df = unfolding_tools.select_fiducial_space(df, mode=mode, fiducial=args.fiducial, unfolding=True, selections=unfolding_selections)
 
     if args.singleLeptonHists and (isW or isZ):
         results.append(df.HistoBoost("nominal_genlep", lep_axes, [*lep_cols, "nominal_weight"], storage=hist.storage.Weight()))

diff --git a/scripts/plotting/makeDataMCStackPlot.py b/scripts/plotting/makeDataMCStackPlot.py
@@ -50,6 +50,7 @@
 parser.add_argument("--fakerateAxes", nargs="+", help="Axes for the fakerate binning", default=["eta","pt","charge"])
 parser.add_argument("--fineGroups", action='store_true', help="Plot each group as a separate process, otherwise combine groups based on predefined dictionary")
 parser.add_argument("--subplotSizes", nargs=2, type=int, default=[4,2], help="Relative sizes for upper and lower panels")
+parser.add_argument("--scaleRatioUnstacked", nargs='*', type=float, default=[], help="Scale a variation by this factor")
 subparsers = parser.add_subparsers(dest="variation")
 variation = subparsers.add_parser("variation", help="Arguments for adding variation hists")
 variation.add_argument("--varName", type=str, nargs='+', required=True, help="Name of variation hist")
@@ -59,6 +60,7 @@
 variation.add_argument("--colors", type=str, nargs='+', help="Variation colors")
 variation.add_argument("--linestyle", type=str, default=[], nargs='+', help="Linestyle for variations")
 variation.add_argument("--doubleColors", action='store_true', help="Auto generate colors in pairs (useful for systematics)")
+variation.add_argument("--doubleLines", action='store_true', help="Auto generate colors in pairs (useful for systematics)")
 variation.add_argument("--fillBetween", type=int, help="Fill between first n variation hists in ratio")
 variation.add_argument("--lowerPanelVariations", type=int, default=0, help="Plot n first variations in lower panel only")
 
@@ -183,7 +185,7 @@ def padArray(ref, matchLength):
     # If none matplotlib will pick a random color
     ncols = len(args.varName) if not args.doubleColors else int(len(args.varName)/2)
     colors = args.colors if args.colors else [colormaps["tab10" if ncols < 10 else "tab20"](int(i/2) if args.doubleColors else i) for i in range(len(args.varName))]
-    for i, (label,name,color) in enumerate(zip(varLabels,args.varName,colors)):
+    for i, (label,name,color) in enumerate(zip(varLabels, args.varName,colors)):
         entry = entries[i] if entries else None
         do_transform = entry in transforms
         name = name if name != "" else nominalName
@@ -242,7 +244,7 @@ def collapseSyst(h):
 
 overflow_ax = ["ptll", "chargeVgen", "massVgen", "ptVgen", "absEtaGen", "ptGen", "ptVGen", "absYVGen", "iso", "dxy", "met","mt"]
 for h in args.hists:
-    if any(x in h.split("-") for x in ["ptll", "mll", "ptVgen", "ptVGen"]):
+    if any(x in h.split("-") for x in ["pt", "ptll", "mll", "ptVgen", "ptVGen", "ptWgen", "ptZgen"]):
         # in case of variable bin width normalize to unit (which is GeV for all of these...)
         binwnorm = 1.0
         ylabel="$Events\,/\,GeV$"
@@ -251,39 +253,51 @@ def collapseSyst(h):
         ylabel="$Events\,/\,bin$"
 
     if args.rlabel is None:
-        rlabel = "$Pred.\,/\,Data$" if args.ratioToData else "$Data\,/\,Pred.$"
+        if args.noData:
+            rlabel = "Ratio to nominal"
+        elif args.ratioToData:
+            rlabel = "$Pred.\,/\,Data$"
+        else:
+            rlabel = "$Data\,/\,Pred.$"
     else:
         rlabel = args.rlabel
+
     if len(h.split("-")) > 1:
         sp = h.split("-")
         action = lambda x: hh.unrolledHist(collapseSyst(x[select]), binwnorm=binwnorm, obs=sp)
         xlabel=f"({', '.join([styles.xlabels.get(s,s).replace('(GeV)','') for s in sp])}) bin"
     else:
         action = lambda x: hh.projectNoFlow(collapseSyst(x[select]), h, overflow_ax)
-        xlabel=styles.xlabels.get(h,h)
+        print(prednames)
+        href = h if h != "ptVgen" else ("ptWgen" if "Wmunu" in prednames else "ptZgen")
+        xlabel=styles.xlabels.get(href,href)
+
+
     fig = plot_tools.makeStackPlotWithRatio(histInfo, prednames, histName=args.baseName, ylim=args.ylim, yscale=args.yscale, logy=args.logy,
             fill_between=args.fillBetween if hasattr(args, "fillBetween") else None, 
             lower_panel_variations=args.lowerPanelVariations if hasattr(args, "lowerPanelVariations") else 0,
+            scaleRatioUnstacked=args.scaleRatioUnstacked,
             action=action, unstacked=unstack, 
             fitresult=args.fitresult, prefit=args.prefit,
             xlabel=xlabel, ylabel=ylabel, rrange=args.rrange, binwnorm=binwnorm, lumi=groups.lumi,
             ratio_to_data=args.ratioToData, rlabel=rlabel,
             xlim=args.xlim, no_fill=args.noFill, no_stack=args.noStack, no_ratio=args.noRatio, density=args.density, flow=args.flow,
-            cms_decor=args.cmsDecor, legtext_size=args.legSize, nlegcols=args.legCols, unstacked_linestyles=args.linestyle if hasattr(args, "linestyle") else [],
+            cms_decor=args.cmsDecor, legtext_size=args.legSize, nlegcols=args.legCols, 
+            unstacked_linestyles=args.linestyle if hasattr(args, "linestyle") else [], double_lines=args.doubleLines if hasattr(args, "doubleLines") else False,
             ratio_error=args.ratioError, normalize_to_data=args.normToData, noSci=args.noSciy, logoPos=args.logoPos, 
             width_scale=1.25 if len(h.split("-")) == 1 else 1, lowerLegCols=args.lowerLegCols, lowerLegPos=args.lowerLegPos,
             subplotsizes=args.subplotSizes,
             )
 
-    fitresultstring=""
-    if args.fitresult:
-        fitresultstring = "prefit" if args.prefit else "postfit"
-    var_arg = None
+    to_join = [f"{h.replace('-','_')}"]
     if "varName" in args and args.varName:
         var_arg = args.varName[0]
         if "selectEntries" in args and args.selectEntries:
             var_arg = args.selectEntries[0] if not args.selectEntries[0].isdigit() else (var_arg+args.selectEntries[0])
-    to_join = [f"{h.replace('-','_')}"]+[var_arg]+[fitresultstring, args.postfix]+[args.channel.replace("all", "")]
+        to_join.append(var_arg)
+    if args.fitresult:
+        to_join.append("prefit" if args.prefit else "postfit")
+    to_join.extend([args.postfix, args.channel.replace("all", "")])
     outfile = "_".join(filter(lambda x: x, to_join))
     if args.cmsDecor == "Preliminary":
         outfile += "_preliminary"