Fix bug in error propagation for fakerate in extended1D; swap regions…

… for fakerate (and hybrid) smoothing to obtain better smoothing p-values; set fakerate smoothing order to 3 by default; add global parameter variations as additional uncertainties on fakes
WMass · Aug 15, 2024 · 199f3ea · 199f3ea
1 parent 3034dfe
commit 199f3ea
Show file tree

Hide file tree

Showing 8 changed files with 41 additions and 35 deletions.
diff --git a/scripts/combine/setupCombine.py b/scripts/combine/setupCombine.py
@@ -85,7 +85,7 @@ def make_parser(parser=None):
     parser.add_argument("--fakeSmoothingMode", type=str, default="hybrid", choices=["binned", "fakerate", "hybrid", "full"], help="Smoothing mode for fake estimate.")
     parser.add_argument("--forceGlobalScaleFakes", default=None, type=float, help="Scale the fakes  by this factor (overriding any custom one implemented in datagroups.py in the fakeSelector).")
     parser.add_argument("--fakeMCCorr", type=str, default=[None], nargs="*", choices=["none", "pt", "eta", "mt"], help="axes to apply nonclosure correction from QCD MC. Leave empty for inclusive correction, use'none' for no correction")
-    parser.add_argument("--fakeSmoothingOrder", type=int, default=3, help="Order of the polynomial for the smoothing of the fake rate or full prediction, depending on the smoothing mode")
+    parser.add_argument("--fakeSmoothingOrder", type=int, default=3, help="Order of the polynomial for the smoothing of the application region or full prediction, depending on the smoothing mode")
     parser.add_argument("--simultaneousABCD", action="store_true", help="Produce datacard for simultaneous fit of ABCD regions")
     parser.add_argument("--skipSumGroups", action="store_true", help="Don't add sum groups to the output to save time e.g. when computing impacts")
     parser.add_argument("--allowNegativeExpectation", action="store_true", help="Allow processes to have negative contributions")
@@ -377,7 +377,7 @@ def setup(args, inputFile, inputBaseName, inputLumiScale, fitvar, genvar=None, x
                 pseudodataGroups.fakerate_axes=args.fakerateAxes
                 pseudodataGroups.set_histselectors(
                     pseudodataGroups.getNames(), inputBaseName, mode=args.fakeEstimation,
-                    smoothing_mode=args.fakeSmoothingMode, smoothingOrderFakerate=args.fakeSmoothingOrder,
+                    smoothing_mode=args.fakeSmoothingMode, smoothingOrderSpectrum=args.fakeSmoothingOrder,
                     mcCorr=args.fakeMCCorr,
                     integrate_x="mt" not in fitvar,
                     simultaneousABCD=simultaneousABCD, forceGlobalScaleFakes=args.forceGlobalScaleFakes,
@@ -394,7 +394,7 @@ def setup(args, inputFile, inputBaseName, inputLumiScale, fitvar, genvar=None, x
             pseudodataGroups.copyGroup("QCD", "QCDTruth")
             pseudodataGroups.set_histselectors(
                 pseudodataGroups.getNames(), inputBaseName, mode=args.fakeEstimation, fake_processes=["QCD",],
-                smoothing_mode=args.fakeSmoothingMode, smoothingOrderFakerate=args.fakeSmoothingOrder,
+                smoothing_mode=args.fakeSmoothingMode, smoothingOrderSpectrum=args.fakeSmoothingOrder,
                 mcCorr=args.fakeMCCorr,
                 simultaneousABCD=simultaneousABCD, forceGlobalScaleFakes=args.forceGlobalScaleFakes,
                 )
@@ -811,10 +811,10 @@ def assertSample(name, startsWith=["W", "Z"], excludeMatch=[]):
         if args.fakeSmoothingMode in ["hybrid", "full"] and args.fakeSmoothingOrder > 0:
             # add systematic of explicit parameter variation
             fakeSmoothingOrder = args.fakeSmoothingOrder
-            def fake_nonclosure(h, axesToDecorrNames, *args, **kwargs):
+            def fake_nonclosure(h, axesToDecorrNames, param_idx=1, variation_size=0.5, *args, **kwargs):
                 # apply variation by adding parameter value (assumes log space, e.g. in full smoothing)
                 fakeselector.spectrum_regressor.external_params = np.zeros(fakeSmoothingOrder + 1)
-                fakeselector.spectrum_regressor.external_params[1] = 0.5
+                fakeselector.spectrum_regressor.external_params[param_idx] = variation_size
                 hvar = fakeselector.get_hist(h, *args, **kwargs)
                 # reset external parameters
                 fakeselector.spectrum_regressor.external_params = None
@@ -837,23 +837,24 @@ def fake_nonclosure(h, axesToDecorrNames, *args, **kwargs):
 
                 return hvar
 
-            for axesToDecorrNames in [[], ]:#["eta"]]:
-                subgroup = f"{cardTool.getFakeName()}Param1"
-                cardTool.addSystematic(
-                    name=inputBaseName, 
-                    group="Fake",
-                    rename=subgroup+ (f"_{'_'.join(axesToDecorrNames)}" if len(axesToDecorrNames) else ""),
-                    splitGroup = {subgroup: f".*", "experiment": ".*"},
-                    systNamePrepend=subgroup,
-                    processes=cardTool.getFakeName(),
-                    noConstraint=False,
-                    mirror=True,
-                    scale=1,
-                    applySelection=False, # don't apply selection, external parameters need to be added
-                    action=fake_nonclosure,
-                    actionArgs=dict(axesToDecorrNames=axesToDecorrNames),
-                    systAxes=["var"] if len(axesToDecorrNames)==0 else [f"{n}_decorr" for n in axesToDecorrNames],
-                )
+            for axesToDecorrNames in [[], ]:
+                for idx, mag in [(1,0.2),(2,0.2),(3,0.2)]:
+                    subgroup = f"{cardTool.getFakeName()}Param{idx}"
+                    cardTool.addSystematic(
+                        name=inputBaseName, 
+                        group="Fake",
+                        rename=subgroup+ (f"_{'_'.join(axesToDecorrNames)}" if len(axesToDecorrNames) else ""),
+                        splitGroup = {subgroup: f".*", "experiment": ".*"},
+                        systNamePrepend=subgroup,
+                        processes=cardTool.getFakeName(),
+                        noConstraint=False,
+                        mirror=True,
+                        scale=1,
+                        applySelection=False, # don't apply selection, external parameters need to be added
+                        action=fake_nonclosure,
+                        actionArgs=dict(axesToDecorrNames=axesToDecorrNames, param_idx=idx, variation_size=mag),
+                        systAxes=["var"] if len(axesToDecorrNames)==0 else [f"{n}_decorr" for n in axesToDecorrNames],
+                    )
 
     if not args.noEfficiencyUnc:
 

diff --git a/scripts/plotting/controlPlotsHDF5.py b/scripts/plotting/controlPlotsHDF5.py
@@ -286,7 +286,7 @@ def make_plot(hists_proc, hist_data, hists_syst_up, hists_syst_dn, axes_names,
         # offset_text.set_position((-0.08,1.02))
 
         if args.cmsDecor:
-            lumi = float(f"{channel_info['lumi']:.3g}") if not density else None
+            lumi = float(f"{channel_info['lumi']:.3g}") if not density and args.dataName=="Data" else None
 
             hep.cms.label(ax=ax1, lumi=lumi, fontsize=legtext_size*scale, 
                 label= args.cmsDecor, data=hist_data is not None)

diff --git a/scripts/plotting/makeDataMCStackPlot.py b/scripts/plotting/makeDataMCStackPlot.py
@@ -46,7 +46,7 @@
 parser.add_argument("--fakeSmoothingMode", type=str, default="hybrid", choices=["binned", "fakerate", "hybrid", "full"], help="Smoothing mode for fake estimate.")
 parser.add_argument("--fakeMCCorr", type=str, default=[None], nargs="*", choices=["none", "pt", "eta", "mt"], help="axes to apply nonclosure correction from QCD MC. Leave empty for inclusive correction, use'none' for no correction")
 parser.add_argument("--forceGlobalScaleFakes", default=None, type=float, help="Scale the fakes  by this factor (overriding any custom one implemented in datagroups.py in the fakeSelector).")
-parser.add_argument("--fakeSmoothingOrder", type=int, default=3, help="Order of the polynomial for the smoothing of the fake rate or full prediction, depending on the smoothing mode")
+parser.add_argument("--fakeSmoothingOrder", type=int, default=3, help="Order of the polynomial for the smoothing of the application region or full prediction, depending on the smoothing mode")
 parser.add_argument("--fakerateAxes", nargs="+", help="Axes for the fakerate binning", default=["eta","pt","charge"])
 parser.add_argument("--fineGroups", action='store_true', help="Plot each group as a separate process, otherwise combine groups based on predefined dictionary")
 

diff --git a/scripts/plotting/postfitPlots.py b/scripts/plotting/postfitPlots.py
@@ -210,7 +210,7 @@ def make_plot(h_data, h_inclusive, h_stack, axes, colors=None, labels=None, suff
     plot_tools.redo_axis_ticks(ax1, "x")
     plot_tools.redo_axis_ticks(ax2, "x")
 
-    hep.cms.label(ax=ax1, lumi=float(f"{lumi:.3g}") if lumi is not None else None, 
+    hep.cms.label(ax=ax1, lumi=float(f"{lumi:.3g}") if lumi is not None and args.dataName=="Data" else None, 
         fontsize=fontsize, 
         label=args.cmsDecor, data=data)
 

diff --git a/wremnants/CardTool.py b/wremnants/CardTool.py
@@ -847,12 +847,17 @@ def loadPseudodata(self, forceNonzero=False):
 
                 fakeselector = self.datagroups.getDatagroups()[self.getFakeName()].histselector
 
-                _0, _1, params_d, cov_d, _chi2_d, _ndf_d = fakeselector.calculate_fullABCD_smoothed(hist_fake, auxiliary_info=True, signal_region=True)
+                _0, _1 = fakeselector.calculate_fullABCD_smoothed(hist_fake, signal_region=True)
+                params_d = fakeselector.spectrum_regressor.params
+                cov_d = fakeselector.spectrum_regressor.cov
+
                 hist_fake = hh.scaleHist(hist_fake, fakeselector.global_scalefactor)
-                _0, _1, params, cov, _chi2, _ndf = fakeselector.calculate_fullABCD_smoothed(hist_fake, auxiliary_info=True)
+                _0, _1 = fakeselector.calculate_fullABCD_smoothed(hist_fake)
+                params = fakeselector.spectrum_regressor.params
+                cov = fakeselector.spectrum_regressor.cov
 
                 # add the nonclosure by adding the difference of the parameters
-                fakeselector.external_params = params_d - params
+                fakeselector.spectrum_regressor.external_params = params_d - params
                 # load the pseudodata including the nonclosure
                 self.datagroups.loadHistsForDatagroups(
                     baseName=self.nominalName, syst=self.nominalName, label=pseudoData,
@@ -866,7 +871,7 @@ def loadPseudodata(self, forceNonzero=False):
                 hdatas.append(hdata)
 
                 # remove the parameter offset again
-                fakeselector.external_params = None
+                fakeselector.spectrum_regressor.external_params = None
                 # add the covariance matrix from the nonclosure to the model
                 fakeselector.external_cov = cov + cov_d
 

diff --git a/wremnants/datasets/datagroups.py b/wremnants/datasets/datagroups.py
@@ -202,7 +202,7 @@ def set_histselectors(
         fake_processes=None,
         mode="extended1D",
         smoothing_mode="full",
-        smoothingOrderFakerate=2,
+        smoothingOrderFakerate=3,
         smoothingOrderSpectrum=3,
         integrate_shapecorrection_x=True, # integrate the abcd x-axis or not, only relevant for extended2D
         simultaneousABCD=False,

diff --git a/wremnants/histselections.py b/wremnants/histselections.py
@@ -247,7 +247,7 @@ class FakeSelectorSimpleABCD(HistselectorABCD):
     # simple ABCD method
     def __init__(self, h, *args, 
         smoothing_mode="full",
-        smoothing_order_fakerate=2,
+        smoothing_order_fakerate=3,
         smoothing_order_spectrum=3,
         throw_toys=None, #"normal", # None, 'normal' or 'poisson'
         global_scalefactor=1, # apply global correction factor on prediction
@@ -301,7 +301,7 @@ def __init__(self, h, *args,
         self.hCorr = None
 
         # swap the A and C regions for better numerical behaviour (only implemented for fakerate and hybrid smoothing)
-        self.swap_regions = False
+        self.swap_regions = True
 
     def set_correction(self, hQCD, axes_names=False, mirror_axes=["eta"], flow=True):
         # hQCD is QCD MC histogram before selection (should contain variances)
@@ -840,7 +840,7 @@ def compute_fakeratefactor(self, h, smoothing=False, syst_variations=False, flow
             axvar = hax.variances(flow=flow)
             bvar = hb.variances(flow=flow)
             bxvar = hbx.variances(flow=flow)
-            y_var = b**4/(bx**2*a**4)*axvar + ax**2*b**2/(bx**2*a**4)*4*bvar + 4*avar/a**2 + ax**2*b**4/(bx**4*a**4)*bxvar
+            y_var = b**4/(bx**2*a**4)*axvar + ax**2*b**2/(bx**2*a**4)*4*bvar + y**2*4*avar/a**2 + y**2*bxvar/bx**2
             y_var[y_den <= 1] = 1e10
 
         if self.hCorr:

diff --git a/wremnants/regression.py b/wremnants/regression.py
@@ -21,8 +21,8 @@ def compute_chi2(y, y_pred, w=None, nparams=1):
     ndf_total = y.size - chi2.size*nparams
 
     logger.info(f"Total chi2/ndf = {chi2_total}/{ndf_total} = {chi2_total/ndf_total} (p = {stats.chi2.sf(chi2_total, ndf_total)})")
-    logger.info(f"Min chi2 = {chi2.min()} (p = {stats.chi2.sf(chi2.min(), ndf)})")
-    logger.info(f"Max chi2 = {chi2.max()} (p = {stats.chi2.sf(chi2.max(), ndf)})")
+    logger.info(f"Min chi2/ndf = {chi2.min()}/{ndf} (p = {stats.chi2.sf(chi2.min(), ndf)})")
+    logger.info(f"Max chi2/ndf = {chi2.max()}/{ndf} (p = {stats.chi2.sf(chi2.max(), ndf)})")
     logger.info(f"Mean chi2 = {chi2.mean()}")
     logger.info(f"Std chi2 = {chi2.std()}")
     return chi2, ndf