Skip to content

Commit

Permalink
Fix bug in error propagation for fakerate in extended1D; swap regions…
Browse files Browse the repository at this point in the history
… for fakerate (and hybrid) smoothing to obtain better smoothing p-values; set fakerate smoothing order to 3 by default; add global parameter variations as additional uncertainties on fakes
  • Loading branch information
davidwalter2 committed Aug 15, 2024
1 parent 3034dfe commit 199f3ea
Show file tree
Hide file tree
Showing 8 changed files with 41 additions and 35 deletions.
45 changes: 23 additions & 22 deletions scripts/combine/setupCombine.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def make_parser(parser=None):
parser.add_argument("--fakeSmoothingMode", type=str, default="hybrid", choices=["binned", "fakerate", "hybrid", "full"], help="Smoothing mode for fake estimate.")
parser.add_argument("--forceGlobalScaleFakes", default=None, type=float, help="Scale the fakes by this factor (overriding any custom one implemented in datagroups.py in the fakeSelector).")
parser.add_argument("--fakeMCCorr", type=str, default=[None], nargs="*", choices=["none", "pt", "eta", "mt"], help="axes to apply nonclosure correction from QCD MC. Leave empty for inclusive correction, use'none' for no correction")
parser.add_argument("--fakeSmoothingOrder", type=int, default=3, help="Order of the polynomial for the smoothing of the fake rate or full prediction, depending on the smoothing mode")
parser.add_argument("--fakeSmoothingOrder", type=int, default=3, help="Order of the polynomial for the smoothing of the application region or full prediction, depending on the smoothing mode")
parser.add_argument("--simultaneousABCD", action="store_true", help="Produce datacard for simultaneous fit of ABCD regions")
parser.add_argument("--skipSumGroups", action="store_true", help="Don't add sum groups to the output to save time e.g. when computing impacts")
parser.add_argument("--allowNegativeExpectation", action="store_true", help="Allow processes to have negative contributions")
Expand Down Expand Up @@ -377,7 +377,7 @@ def setup(args, inputFile, inputBaseName, inputLumiScale, fitvar, genvar=None, x
pseudodataGroups.fakerate_axes=args.fakerateAxes
pseudodataGroups.set_histselectors(
pseudodataGroups.getNames(), inputBaseName, mode=args.fakeEstimation,
smoothing_mode=args.fakeSmoothingMode, smoothingOrderFakerate=args.fakeSmoothingOrder,
smoothing_mode=args.fakeSmoothingMode, smoothingOrderSpectrum=args.fakeSmoothingOrder,
mcCorr=args.fakeMCCorr,
integrate_x="mt" not in fitvar,
simultaneousABCD=simultaneousABCD, forceGlobalScaleFakes=args.forceGlobalScaleFakes,
Expand All @@ -394,7 +394,7 @@ def setup(args, inputFile, inputBaseName, inputLumiScale, fitvar, genvar=None, x
pseudodataGroups.copyGroup("QCD", "QCDTruth")
pseudodataGroups.set_histselectors(
pseudodataGroups.getNames(), inputBaseName, mode=args.fakeEstimation, fake_processes=["QCD",],
smoothing_mode=args.fakeSmoothingMode, smoothingOrderFakerate=args.fakeSmoothingOrder,
smoothing_mode=args.fakeSmoothingMode, smoothingOrderSpectrum=args.fakeSmoothingOrder,
mcCorr=args.fakeMCCorr,
simultaneousABCD=simultaneousABCD, forceGlobalScaleFakes=args.forceGlobalScaleFakes,
)
Expand Down Expand Up @@ -811,10 +811,10 @@ def assertSample(name, startsWith=["W", "Z"], excludeMatch=[]):
if args.fakeSmoothingMode in ["hybrid", "full"] and args.fakeSmoothingOrder > 0:
# add systematic of explicit parameter variation
fakeSmoothingOrder = args.fakeSmoothingOrder
def fake_nonclosure(h, axesToDecorrNames, *args, **kwargs):
def fake_nonclosure(h, axesToDecorrNames, param_idx=1, variation_size=0.5, *args, **kwargs):
# apply variation by adding parameter value (assumes log space, e.g. in full smoothing)
fakeselector.spectrum_regressor.external_params = np.zeros(fakeSmoothingOrder + 1)
fakeselector.spectrum_regressor.external_params[1] = 0.5
fakeselector.spectrum_regressor.external_params[param_idx] = variation_size
hvar = fakeselector.get_hist(h, *args, **kwargs)
# reset external parameters
fakeselector.spectrum_regressor.external_params = None
Expand All @@ -837,23 +837,24 @@ def fake_nonclosure(h, axesToDecorrNames, *args, **kwargs):

return hvar

for axesToDecorrNames in [[], ]:#["eta"]]:
subgroup = f"{cardTool.getFakeName()}Param1"
cardTool.addSystematic(
name=inputBaseName,
group="Fake",
rename=subgroup+ (f"_{'_'.join(axesToDecorrNames)}" if len(axesToDecorrNames) else ""),
splitGroup = {subgroup: f".*", "experiment": ".*"},
systNamePrepend=subgroup,
processes=cardTool.getFakeName(),
noConstraint=False,
mirror=True,
scale=1,
applySelection=False, # don't apply selection, external parameters need to be added
action=fake_nonclosure,
actionArgs=dict(axesToDecorrNames=axesToDecorrNames),
systAxes=["var"] if len(axesToDecorrNames)==0 else [f"{n}_decorr" for n in axesToDecorrNames],
)
for axesToDecorrNames in [[], ]:
for idx, mag in [(1,0.2),(2,0.2),(3,0.2)]:
subgroup = f"{cardTool.getFakeName()}Param{idx}"
cardTool.addSystematic(
name=inputBaseName,
group="Fake",
rename=subgroup+ (f"_{'_'.join(axesToDecorrNames)}" if len(axesToDecorrNames) else ""),
splitGroup = {subgroup: f".*", "experiment": ".*"},
systNamePrepend=subgroup,
processes=cardTool.getFakeName(),
noConstraint=False,
mirror=True,
scale=1,
applySelection=False, # don't apply selection, external parameters need to be added
action=fake_nonclosure,
actionArgs=dict(axesToDecorrNames=axesToDecorrNames, param_idx=idx, variation_size=mag),
systAxes=["var"] if len(axesToDecorrNames)==0 else [f"{n}_decorr" for n in axesToDecorrNames],
)

if not args.noEfficiencyUnc:

Expand Down
2 changes: 1 addition & 1 deletion scripts/plotting/controlPlotsHDF5.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ def make_plot(hists_proc, hist_data, hists_syst_up, hists_syst_dn, axes_names,
# offset_text.set_position((-0.08,1.02))

if args.cmsDecor:
lumi = float(f"{channel_info['lumi']:.3g}") if not density else None
lumi = float(f"{channel_info['lumi']:.3g}") if not density and args.dataName=="Data" else None

hep.cms.label(ax=ax1, lumi=lumi, fontsize=legtext_size*scale,
label= args.cmsDecor, data=hist_data is not None)
Expand Down
2 changes: 1 addition & 1 deletion scripts/plotting/makeDataMCStackPlot.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
parser.add_argument("--fakeSmoothingMode", type=str, default="hybrid", choices=["binned", "fakerate", "hybrid", "full"], help="Smoothing mode for fake estimate.")
parser.add_argument("--fakeMCCorr", type=str, default=[None], nargs="*", choices=["none", "pt", "eta", "mt"], help="axes to apply nonclosure correction from QCD MC. Leave empty for inclusive correction, use'none' for no correction")
parser.add_argument("--forceGlobalScaleFakes", default=None, type=float, help="Scale the fakes by this factor (overriding any custom one implemented in datagroups.py in the fakeSelector).")
parser.add_argument("--fakeSmoothingOrder", type=int, default=3, help="Order of the polynomial for the smoothing of the fake rate or full prediction, depending on the smoothing mode")
parser.add_argument("--fakeSmoothingOrder", type=int, default=3, help="Order of the polynomial for the smoothing of the application region or full prediction, depending on the smoothing mode")
parser.add_argument("--fakerateAxes", nargs="+", help="Axes for the fakerate binning", default=["eta","pt","charge"])
parser.add_argument("--fineGroups", action='store_true', help="Plot each group as a separate process, otherwise combine groups based on predefined dictionary")

Expand Down
2 changes: 1 addition & 1 deletion scripts/plotting/postfitPlots.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ def make_plot(h_data, h_inclusive, h_stack, axes, colors=None, labels=None, suff
plot_tools.redo_axis_ticks(ax1, "x")
plot_tools.redo_axis_ticks(ax2, "x")

hep.cms.label(ax=ax1, lumi=float(f"{lumi:.3g}") if lumi is not None else None,
hep.cms.label(ax=ax1, lumi=float(f"{lumi:.3g}") if lumi is not None and args.dataName=="Data" else None,
fontsize=fontsize,
label=args.cmsDecor, data=data)

Expand Down
13 changes: 9 additions & 4 deletions wremnants/CardTool.py
Original file line number Diff line number Diff line change
Expand Up @@ -847,12 +847,17 @@ def loadPseudodata(self, forceNonzero=False):

fakeselector = self.datagroups.getDatagroups()[self.getFakeName()].histselector

_0, _1, params_d, cov_d, _chi2_d, _ndf_d = fakeselector.calculate_fullABCD_smoothed(hist_fake, auxiliary_info=True, signal_region=True)
_0, _1 = fakeselector.calculate_fullABCD_smoothed(hist_fake, signal_region=True)
params_d = fakeselector.spectrum_regressor.params
cov_d = fakeselector.spectrum_regressor.cov

hist_fake = hh.scaleHist(hist_fake, fakeselector.global_scalefactor)
_0, _1, params, cov, _chi2, _ndf = fakeselector.calculate_fullABCD_smoothed(hist_fake, auxiliary_info=True)
_0, _1 = fakeselector.calculate_fullABCD_smoothed(hist_fake)
params = fakeselector.spectrum_regressor.params
cov = fakeselector.spectrum_regressor.cov

# add the nonclosure by adding the difference of the parameters
fakeselector.external_params = params_d - params
fakeselector.spectrum_regressor.external_params = params_d - params
# load the pseudodata including the nonclosure
self.datagroups.loadHistsForDatagroups(
baseName=self.nominalName, syst=self.nominalName, label=pseudoData,
Expand All @@ -866,7 +871,7 @@ def loadPseudodata(self, forceNonzero=False):
hdatas.append(hdata)

# remove the parameter offset again
fakeselector.external_params = None
fakeselector.spectrum_regressor.external_params = None
# add the covariance matrix from the nonclosure to the model
fakeselector.external_cov = cov + cov_d

Expand Down
2 changes: 1 addition & 1 deletion wremnants/datasets/datagroups.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ def set_histselectors(
fake_processes=None,
mode="extended1D",
smoothing_mode="full",
smoothingOrderFakerate=2,
smoothingOrderFakerate=3,
smoothingOrderSpectrum=3,
integrate_shapecorrection_x=True, # integrate the abcd x-axis or not, only relevant for extended2D
simultaneousABCD=False,
Expand Down
6 changes: 3 additions & 3 deletions wremnants/histselections.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ class FakeSelectorSimpleABCD(HistselectorABCD):
# simple ABCD method
def __init__(self, h, *args,
smoothing_mode="full",
smoothing_order_fakerate=2,
smoothing_order_fakerate=3,
smoothing_order_spectrum=3,
throw_toys=None, #"normal", # None, 'normal' or 'poisson'
global_scalefactor=1, # apply global correction factor on prediction
Expand Down Expand Up @@ -301,7 +301,7 @@ def __init__(self, h, *args,
self.hCorr = None

# swap the A and C regions for better numerical behaviour (only implemented for fakerate and hybrid smoothing)
self.swap_regions = False
self.swap_regions = True

def set_correction(self, hQCD, axes_names=False, mirror_axes=["eta"], flow=True):
# hQCD is QCD MC histogram before selection (should contain variances)
Expand Down Expand Up @@ -840,7 +840,7 @@ def compute_fakeratefactor(self, h, smoothing=False, syst_variations=False, flow
axvar = hax.variances(flow=flow)
bvar = hb.variances(flow=flow)
bxvar = hbx.variances(flow=flow)
y_var = b**4/(bx**2*a**4)*axvar + ax**2*b**2/(bx**2*a**4)*4*bvar + 4*avar/a**2 + ax**2*b**4/(bx**4*a**4)*bxvar
y_var = b**4/(bx**2*a**4)*axvar + ax**2*b**2/(bx**2*a**4)*4*bvar + y**2*4*avar/a**2 + y**2*bxvar/bx**2
y_var[y_den <= 1] = 1e10

if self.hCorr:
Expand Down
4 changes: 2 additions & 2 deletions wremnants/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ def compute_chi2(y, y_pred, w=None, nparams=1):
ndf_total = y.size - chi2.size*nparams

logger.info(f"Total chi2/ndf = {chi2_total}/{ndf_total} = {chi2_total/ndf_total} (p = {stats.chi2.sf(chi2_total, ndf_total)})")
logger.info(f"Min chi2 = {chi2.min()} (p = {stats.chi2.sf(chi2.min(), ndf)})")
logger.info(f"Max chi2 = {chi2.max()} (p = {stats.chi2.sf(chi2.max(), ndf)})")
logger.info(f"Min chi2/ndf = {chi2.min()}/{ndf} (p = {stats.chi2.sf(chi2.min(), ndf)})")
logger.info(f"Max chi2/ndf = {chi2.max()}/{ndf} (p = {stats.chi2.sf(chi2.max(), ndf)})")
logger.info(f"Mean chi2 = {chi2.mean()}")
logger.info(f"Std chi2 = {chi2.std()}")
return chi2, ndf
Expand Down

0 comments on commit 199f3ea

Please sign in to comment.