From 61ea84a7cc83b1ea154f8150b4d377bdbd80c011 Mon Sep 17 00:00:00 2001
From: Davide Bruschini <davide.bruschini@cern.ch>
Date: Thu, 1 Aug 2024 20:47:24 +0200
Subject: [PATCH 1/2] Changes in the implementation of MiNNLO muRmuFPolVar (and
 corrections to use them for unfolding and theory agnostic)

---
 scripts/combine/setupCombine.py               | 11 ++-
 scripts/histmakers/mw_with_mu_eta_pt.py       |  7 +-
 scripts/histmakers/mz_wlike_with_mu_eta_pt.py |  5 +-
 utilities/common.py                           |  2 +-
 wremnants-data                                |  2 +-
 wremnants/combine_theoryAgnostic_helper.py    | 82 +++++++++++--------
 wremnants/combine_theory_helper.py            |  3 +-
 wremnants/syst_tools.py                       | 18 ++++
 8 files changed, 86 insertions(+), 44 deletions(-)

diff --git a/scripts/combine/setupCombine.py b/scripts/combine/setupCombine.py
index 0d61f0d1e..a1d8ac0cb 100644
--- a/scripts/combine/setupCombine.py
+++ b/scripts/combine/setupCombine.py
@@ -137,6 +137,7 @@ def make_parser(parser=None):
     parser.add_argument("--forceConstrainMass", action='store_true', help="force mass to be constrained in fit")
     parser.add_argument("--decorMassWidth", action='store_true', help="remove width variations from mass variations")
     parser.add_argument("--muRmuFPolVar", action="store_true", help="Use polynomial variations (like in theoryAgnosticPolVar) instead of binned variations for muR and muF (of course in setupCombine these are still constrained nuisances)")
+    parser.add_argument("--muRmuFFullyCorr", action="store_true", help="Use the fully correlated muR/muF variations derived from theoryAgnosticPolVar polynomial variations instead of the binned ones (by default you take the binned fully correlated variations even if you are using the polynomial variations for muR/muF, they just take care of the \"uncorrelated\" part) (this option only works with --muRmuFPolVar, it doesn't work on its own)")
     parser = make_subparsers(parser)
 
     return parser
@@ -397,6 +398,9 @@ def setup(args, inputFile, inputBaseName, inputLumiScale, fitvar, genvar=None, x
     def assertSample(name, startsWith=["W", "Z"], excludeMatch=[]):
         return any(name.startswith(init) for init in startsWith) and all(excl not in name for excl in excludeMatch)
 
+    def assertSampleWithOOA(name, startsWith=["W", "Z"], excludeMatch=[]):
+        return any(name.startswith(init) for init in startsWith) and name.endswith("OOA") and all(excl not in name for excl in excludeMatch)
+
     dibosonMatch = ["WW", "WZ", "ZZ"]
     WMatch = ["W"] # TODO: the name of out-of-acceptance might be changed at some point, maybe to WmunuOutAcc, so W will match it as well (and can exclude it using "OutAcc" if needed)
     ZMatch = ["Z"]
@@ -422,6 +426,11 @@ def assertSample(name, startsWith=["W", "Z"], excludeMatch=[]):
     cardTool.addProcessGroup("nonsignal_samples_noOutAcc",     lambda x: assertSample(x, startsWith=nonSignalMatch, excludeMatch=[*dibosonMatch, "tau", "OOA"]))
     cardTool.addProcessGroup("signal_samples_inctau_noOutAcc", lambda x: assertSample(x, startsWith=signalMatch, excludeMatch=[*dibosonMatch, "OOA"]))
     cardTool.addProcessGroup("nonsignal_samples_inctau_noOutAcc", lambda x: assertSample(x, startsWith=nonSignalMatch, excludeMatch=[*dibosonMatch, "OOA"]))
+    # FIX for theoryAgnosticPolVar when using MiNNLO polynomial variations (we would still need these for the OOA)
+    cardTool.addProcessGroup("signal_samples_OOA",        lambda x: assertSampleWithOOA(x, startsWith=signalMatch, excludeMatch=[*dibosonMatch, "tau"]))
+    cardTool.addProcessGroup("nonsignal_samples_OOA",     lambda x: assertSampleWithOOA(x, startsWith=nonSignalMatch, excludeMatch=[*dibosonMatch, "tau"]))
+    cardTool.addProcessGroup("signal_samples_inctau_OOA", lambda x: assertSampleWithOOA(x, startsWith=signalMatch, excludeMatch=[*dibosonMatch]))
+    cardTool.addProcessGroup("nonsignal_samples_inctau_OOA", lambda x: assertSampleWithOOA(x, startsWith=nonSignalMatch, excludeMatch=[*dibosonMatch]))
 
     if not (isTheoryAgnostic or isUnfolding) :
         logger.info(f"All MC processes {cardTool.procGroups['MCnoQCD']}")
@@ -582,7 +591,7 @@ def assertSample(name, startsWith=["W", "Z"], excludeMatch=[]):
                         for g in cardTool.procGroups["signal_samples"] for m in cardTool.datagroups.groups[g].members},
                 )
 
-    if args.muRmuFPolVar and not isTheoryAgnosticPolVar:
+    if args.muRmuFPolVar and not isPoiAsNoi:
         muRmuFPolVar_helper = combine_theoryAgnostic_helper.TheoryAgnosticHelper(cardTool, externalArgs=args)
         muRmuFPolVar_helper.configure_polVar(label,
                                              passSystToFakes,
diff --git a/scripts/histmakers/mw_with_mu_eta_pt.py b/scripts/histmakers/mw_with_mu_eta_pt.py
index 0720619d8..aa9f23613 100644
--- a/scripts/histmakers/mw_with_mu_eta_pt.py
+++ b/scripts/histmakers/mw_with_mu_eta_pt.py
@@ -635,23 +635,20 @@ def build_graph(df, dataset):
         # End graph here only for standard theory agnostic analysis, otherwise use same loop as traditional analysis
         return results, weightsum
 
-    if isWorZ and not hasattr(dataset, "out_of_acceptance"):
+    if isWorZ: #this needs to be created also for out-of-acceptance, when present (e.g. unfolding, theory-agnostic, etc.)
         theoryAgnostic_helpers_cols = ["qtOverQ", "absYVgen", "chargeVgen", "csSineCosThetaPhigen", "nominal_weight"]
         # assume to have same coeffs for plus and minus (no reason for it not to be the case)
         if dataset.name == "WplusmunuPostVFP" or dataset.name == "WplustaunuPostVFP":
             helpers_class = muRmuFPolVar_helpers_plus
-            process_name = "W"
         elif dataset.name == "WminusmunuPostVFP" or dataset.name == "WminustaunuPostVFP":
             helpers_class = muRmuFPolVar_helpers_minus
-            process_name = "W"
         elif dataset.name == "ZmumuPostVFP" or dataset.name == "ZtautauPostVFP":
             helpers_class = muRmuFPolVar_helpers_Z
-            process_name = "Z"
         for coeffKey in helpers_class.keys():
             logger.debug(f"Creating muR/muF histograms with polynomial variations for {coeffKey}")
             helperQ = helpers_class[coeffKey]
             df = df.Define(f"muRmuFPolVar_{coeffKey}_tensor", helperQ, theoryAgnostic_helpers_cols)
-            noiAsPoiWithPolHistName = Datagroups.histName("nominal", syst=f"muRmuFPolVar{process_name}_{coeffKey}")
+            noiAsPoiWithPolHistName = Datagroups.histName("nominal", syst=f"muRmuFPolVar_{coeffKey}")
             results.append(df.HistoBoost(noiAsPoiWithPolHistName, nominal_axes, [*nominal_cols, f"muRmuFPolVar_{coeffKey}_tensor"], tensor_axes=helperQ.tensor_axes, storage=hist.storage.Double()))
 
     if not args.onlyMainHistograms:
diff --git a/scripts/histmakers/mz_wlike_with_mu_eta_pt.py b/scripts/histmakers/mz_wlike_with_mu_eta_pt.py
index 5c17630cb..ef0b29933 100644
--- a/scripts/histmakers/mz_wlike_with_mu_eta_pt.py
+++ b/scripts/histmakers/mz_wlike_with_mu_eta_pt.py
@@ -329,17 +329,16 @@ def build_graph(df, dataset):
     nominal = df.HistoBoost("nominal", axes, [*cols, "nominal_weight"])
     results.append(nominal)
 
-    if isZ and not hasattr(dataset, "out_of_acceptance"):
+    if isZ: #this needs to be created also for out-of-acceptance, when present (e.g. unfolding, theory-agnostic, etc.)
         theoryAgnostic_helpers_cols = ["qtOverQ", "absYVgen", "chargeVgen", "csSineCosThetaPhigen", "nominal_weight"]
         # assume to have same coeffs for plus and minus (no reason for it not to be the case)
         if dataset.name == "ZmumuPostVFP" or dataset.name == "ZtautauPostVFP":
             helpers_class = muRmuFPolVar_helpers_Z
-            process_name = "Z"
         for coeffKey in helpers_class.keys():
             logger.debug(f"Creating muR/muF histograms with polynomial variations for {coeffKey}")
             helperQ = helpers_class[coeffKey]
             df = df.Define(f"muRmuFPolVar_{coeffKey}_tensor", helperQ, theoryAgnostic_helpers_cols)
-            noiAsPoiWithPolHistName = Datagroups.histName("nominal", syst=f"muRmuFPolVar{process_name}_{coeffKey}")
+            noiAsPoiWithPolHistName = Datagroups.histName("nominal", syst=f"muRmuFPolVar_{coeffKey}")
             results.append(df.HistoBoost(noiAsPoiWithPolHistName, nominal_axes, [*nominal_cols, f"muRmuFPolVar_{coeffKey}_tensor"], tensor_axes=helperQ.tensor_axes, storage=hist.storage.Double()))
 
     if not args.noRecoil and args.recoilUnc:
diff --git a/utilities/common.py b/utilities/common.py
index e0abbd36e..6f0d4fbb7 100644
--- a/utilities/common.py
+++ b/utilities/common.py
@@ -360,7 +360,7 @@ def __call__(self, parser, namespace, values, option_string=None):
     parser.add_argument("--noScaleToData", action="store_true", help="Do not scale the MC histograms with xsec*lumi/sum(gen weights) in the postprocessing step")
     parser.add_argument("--aggregateGroups", type=str, nargs="*", default=["Diboson", "Top"], help="Sum up histograms from members of given groups in the postprocessing step")
     parser.add_argument("--muRmuFPolVarFilePath", type=str, default=f"{data_dir}/MiNNLOmuRmuFPolVar/", help="Path where input files are stored")
-    parser.add_argument("--muRmuFPolVarFileTag", type=str, default="x0p50_y4p00_ConstrPol5ExtYdep_Trad", choices=["x0p50_y4p00_ConstrPol5ExtYdep_Trad","x0p50_y4p00_ConstrPol5Ext_Trad"],help="Tag for input files")
+    parser.add_argument("--muRmuFPolVarFileTag", type=str, default="x0p50_y4p00_Fix_Trad", choices=["x0p50_y4p00_Fix_Trad","x0p50_y4p00_FixYdep_Trad"],help="Tag for input files")
     parser.add_argument("--nToysMC", type=int, help="random toys for data and MC", default=-1)
     parser.add_argument("--varianceScalingForToys", type=int, default=1, help="Scaling of variance for toys (effective mc statistics corresponds to 1./scaling)")
     parser.add_argument("--randomSeedForToys", type=int, default=0, help="random seed for toys")
diff --git a/wremnants-data b/wremnants-data
index 7eeb03a0a..ba3eaea39 160000
--- a/wremnants-data
+++ b/wremnants-data
@@ -1 +1 @@
-Subproject commit 7eeb03a0ada1c31abc56d791bab385ae765041b7
+Subproject commit ba3eaea39018cc2a0e79fc17be5aeae5abbd97b3
diff --git a/wremnants/combine_theoryAgnostic_helper.py b/wremnants/combine_theoryAgnostic_helper.py
index c19a97249..c01a90335 100644
--- a/wremnants/combine_theoryAgnostic_helper.py
+++ b/wremnants/combine_theoryAgnostic_helper.py
@@ -56,35 +56,51 @@ def add_theoryAgnostic_polVar_uncertainty(self):
                                        #splitGroup={f"{groupName}_{coeffKey}" : f"{groupName}_{coeffKey}"}
                                        )
 
-    def add_muRmuF_polVar_uncertainty(self):
+    def add_muRmuF_polVar_uncertainty(self, isTheoryAgnostic):
         coeffs = [f"A{i}" for i in range(8)]
         signalGroupName = "muRmuFPolVarW" if self.label == "W" else "muRmuFPolVarZ"
         nonSignalGroupName = "muRmuFPolVarZ" if self.label == "W" else "muRmuFPolVarW"
-        for coeffKey in coeffs:
-            self.card_tool.addSystematic(f"{signalGroupName}_{coeffKey}",
-                                   group=signalGroupName,
-                                   mirror=False,
-                                   passToFakes=self.passSystToFakes,
-                                   processes=["signal_samples_inctau_noOutAcc" if self.separateOutOfAccSignal else "signal_samples_inctau"],
-                                   baseName=f"{signalGroupName}_{coeffKey}_",
-                                   noConstraint=False,
-                                   systAxes=["nPolVarSyst", "downUpVar"], 
-                                   labelsByAxis=["v", "downUpVar"],
-                                   #splitGroup={f"{groupName}_{coeffKey}" : f"{groupName}_{coeffKey}"}
-                                   )
-        
-        for coeffKey in coeffs:
-            self.card_tool.addSystematic(f"{nonSignalGroupName}_{coeffKey}",
-                                   group=nonSignalGroupName,
-                                   mirror=False,
-                                   passToFakes=self.passSystToFakes,
-                                   processes=["nonsignal_samples_inctau_noOutAcc" if self.separateOutOfAccSignal else "nonsignal_samples_inctau"],
-                                   baseName=f"{nonSignalGroupName}_{coeffKey}_",
-                                   noConstraint=False,
-                                   systAxes=["nPolVarSyst", "downUpVar"], 
-                                   labelsByAxis=["v", "downUpVar"],
-                                   #splitGroup={f"{groupName}_{coeffKey}" : f"{groupName}_{coeffKey}"}
-                                   )
+        signalProcesses = "signal_samples_inctau_OOA" if isTheoryAgnostic else "signal_samples_inctau"
+        nonsignalProcesses = "nonsignal_samples_inctau_OOA" if isTheoryAgnostic else "nonsignal_samples_inctau"
+        ProcessNames = [signalGroupName,nonSignalGroupName]
+        Processes = [signalProcesses,nonsignalProcesses]
+        if self.args.muRmuFFullyCorr:
+            signal_expanded_samples = self.card_tool.getProcNames([signalProcesses])
+            nonsignal_expanded_samples = self.card_tool.getProcNames([nonsignalProcesses])
+            func = syst_tools.muRmuFFullyCorrelatedVariations
+            signal_preop_map = {proc : func for proc in signal_expanded_samples}
+            nonsignal_preop_map = {proc : func for proc in nonsignal_expanded_samples}
+            preop_args = {}
+            PreOpMaps = [signal_preop_map,nonsignal_preop_map]
+        for Index in range(0,len(ProcessNames)):
+            for coeffKey in coeffs:
+                self.card_tool.addSystematic(f"muRmuFPolVar_{coeffKey}",
+                                       group=ProcessNames[Index],
+                                       mirror=False,
+                                       passToFakes=self.passSystToFakes,
+                                       processes=[Processes[Index]],
+                                       baseName=f"{ProcessNames[Index]}_{coeffKey}_",
+                                       noConstraint=False,
+                                       systAxes=["nPolVarSyst", "downUpVar"], 
+                                       labelsByAxis=["v", "downUpVar"],
+                                       rename=f"{ProcessNames[Index]}_{coeffKey}",
+                                       #splitGroup={f"{groupName}_{coeffKey}" : f"{groupName}_{coeffKey}"}
+                                       )
+                if self.args.muRmuFFullyCorr:
+                    self.card_tool.addSystematic(f"muRmuFPolVar_{coeffKey}",
+                                           preOpMap=PreOpMaps[Index],
+                                           preOpArgs=preop_args,
+                                           group=ProcessNames[Index],
+                                           mirror=False,
+                                           passToFakes=self.passSystToFakes,
+                                           processes=[Processes[Index]],
+                                           baseName=f"{ProcessNames[Index]}_{coeffKey}_FullyCorr_",
+                                           noConstraint=False,
+                                           systAxes=["nPolVarSyst", "downUpVar"], 
+                                           labelsByAxis=["v", "downUpVar"],
+                                           rename=f"{ProcessNames[Index]}_{coeffKey}_FullyCorr",
+                                           #splitGroup={f"{groupName}_{coeffKey}" : f"{groupName}_{coeffKey}"}
+                                           )
         
 
     def add_theoryAgnostic_normVar_uncertainty(self, flow=True):
@@ -195,9 +211,11 @@ def add_theoryAgnostic_normVar_uncertainty(self, flow=True):
                                 )
 
     def add_theoryAgnostic_uncertainty(self):
-        if self.args.analysisMode == "theoryAgnosticPolVar":
-            self.add_theoryAgnostic_polVar_uncertainty()
-        elif self.args.muRmuFPolVar == True:
-            self.add_muRmuF_polVar_uncertainty()
-        else:
-            self.add_theoryAgnostic_normVar_uncertainty()
+        isTheoryAgnostic = self.args.analysisMode in ["theoryAgnosticNormVar", "theoryAgnosticPolVar"]
+        if isTheoryAgnostic:
+            if self.args.analysisMode == "theoryAgnosticPolVar":
+                self.add_theoryAgnostic_polVar_uncertainty()
+            else:
+                self.add_theoryAgnostic_normVar_uncertainty()
+        if self.args.muRmuFPolVar == True:
+            self.add_muRmuF_polVar_uncertainty(isTheoryAgnostic)
diff --git a/wremnants/combine_theory_helper.py b/wremnants/combine_theory_helper.py
index 117414f8a..0a26aa4e6 100644
--- a/wremnants/combine_theory_helper.py
+++ b/wremnants/combine_theory_helper.py
@@ -155,7 +155,8 @@ def add_resum_unc(self, scale=1):
 
                         self.add_minnlo_scale_uncertainty(sample_group, extra_name = "fine", rebin_pt=fine_pt_binning, helicities_to_exclude=helicities_to_exclude)
 
-                    self.add_minnlo_scale_uncertainty(sample_group, extra_name = "inclusive", rebin_pt=[common.ptV_binning[0], common.ptV_binning[-1]], helicities_to_exclude=helicities_to_exclude, scale = scale_inclusive)
+                    if not (self.args.muRmuFPolVar and self.args.muRmuFFullyCorr):
+                        self.add_minnlo_scale_uncertainty(sample_group, extra_name = "inclusive", rebin_pt=[common.ptV_binning[0], common.ptV_binning[-1]], helicities_to_exclude=helicities_to_exclude, scale = scale_inclusive)
 
             # additional uncertainty for effect of shower and intrinsic kt on angular coeffs
             self.add_helicity_shower_kt_uncertainty()
diff --git a/wremnants/syst_tools.py b/wremnants/syst_tools.py
index a9328400b..eb552b481 100644
--- a/wremnants/syst_tools.py
+++ b/wremnants/syst_tools.py
@@ -361,6 +361,24 @@ def hist_to_variations(hist_in, gen_axes = [], sum_axes = [], rebin_axes=[], reb
 
     return variation_hist
 
+def muRmuFFullyCorrelatedVariations(hist_in):
+
+    if hist_in.name is None:
+        out_name = "hist_variations"
+    else:
+        out_name = hist_in.name + "_variations"
+
+    variation_hist = hist.Hist(*hist_in.axes, name = out_name, storage = hist.storage.Double())
+    variation_hist = variation_hist[...,0:1,:]
+
+    for i in range(len(hist_in.axes[len(hist_in.axes)-2])):
+        variation_hist.values(flow=True)[...,0,0] = variation_hist.values(flow=True)[...,0,0] + hist_in.values(flow=True)[...,i,0] - (hist_in.values(flow=True)[...,0,0]+hist_in.values(flow=True)[...,0,1])/2 #this average which is subtracted is the nominal histogram (by construction, it's directly related to how the weights are defined)
+        variation_hist.values(flow=True)[...,0,1] = variation_hist.values(flow=True)[...,0,1] + hist_in.values(flow=True)[...,i,1] - (hist_in.values(flow=True)[...,0,0]+hist_in.values(flow=True)[...,0,1])/2
+    variation_hist.values(flow=True)[...,0,0] = variation_hist.values(flow=True)[...,0,0] + (hist_in.values(flow=True)[...,0,0]+hist_in.values(flow=True)[...,0,1])/2
+    variation_hist.values(flow=True)[...,0,1] = variation_hist.values(flow=True)[...,0,1] + (hist_in.values(flow=True)[...,0,0]+hist_in.values(flow=True)[...,0,1])/2
+
+    return variation_hist
+
 
 def uncertainty_hist_from_envelope(h, proj_ax, entries):
     hdown = hh.syst_min_or_max_env_hist(h, proj_ax, "vars", entries, no_flow=["ptVgen"], do_min=True)

From 674b9ff09b5e9ce3aa73f7c093b8cb8370a77c25 Mon Sep 17 00:00:00 2001
From: dbruschi <62343966+dbruschi@users.noreply.github.com>
Date: Thu, 1 Aug 2024 21:56:03 +0200
Subject: [PATCH 2/2] Update wremnants-data

---
 wremnants-data | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wremnants-data b/wremnants-data
index ba3eaea39..dfb2ca916 160000
--- a/wremnants-data
+++ b/wremnants-data
@@ -1 +1 @@
-Subproject commit ba3eaea39018cc2a0e79fc17be5aeae5abbd97b3
+Subproject commit dfb2ca9163679ae8d5c91270c2a74e34c4de941b