Skip to content

Commit

Permalink
Merge pull request #521 from davidwalter2/240724_fakes
Browse files Browse the repository at this point in the history
Update QCD background: smoothing, uncertainties, and bias tests
  • Loading branch information
bendavid authored Aug 16, 2024
2 parents 9f0825a + 8856b02 commit 42d7fd3
Show file tree
Hide file tree
Showing 10 changed files with 812 additions and 748 deletions.
110 changes: 63 additions & 47 deletions scripts/combine/setupCombine.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,10 @@ def make_parser(parser=None):
parser.add_argument("--noMCStat", action='store_true', help="Do not include MC stat uncertainty in covariance for theory fit (only when using --fitresult)")
parser.add_argument("--fakerateAxes", nargs="+", help="Axes for the fakerate binning", default=["eta","pt","charge"])
parser.add_argument("--fakeEstimation", type=str, help="Set the mode for the fake estimation", default="extended1D", choices=["closure", "simple", "extrapolate", "extended1D", "extended2D"])
parser.add_argument("--fakeSmoothingMode", type=str, default="full", choices=["binned", "fakerate", "full"], help="Smoothing mode for fake estimate.")
parser.add_argument("--fakeSmoothingMode", type=str, default="hybrid", choices=["binned", "fakerate", "hybrid", "full"], help="Smoothing mode for fake estimate.")
parser.add_argument("--forceGlobalScaleFakes", default=None, type=float, help="Scale the fakes by this factor (overriding any custom one implemented in datagroups.py in the fakeSelector).")
parser.add_argument("--fakeMCCorr", type=str, default=[None], nargs="*", choices=["none", "pt", "eta", "mt"], help="axes to apply nonclosure correction from QCD MC. Leave empty for inclusive correction, use'none' for no correction")
parser.add_argument("--fakeSmoothingOrder", type=int, default=3, help="Order of the polynomial for the smoothing of the fake rate or full prediction, depending on the smoothing mode")
parser.add_argument("--fakeSmoothingOrder", type=int, default=3, help="Order of the polynomial for the smoothing of the application region or full prediction, depending on the smoothing mode")
parser.add_argument("--simultaneousABCD", action="store_true", help="Produce datacard for simultaneous fit of ABCD regions")
parser.add_argument("--skipSumGroups", action="store_true", help="Don't add sum groups to the output to save time e.g. when computing impacts")
parser.add_argument("--allowNegativeExpectation", action="store_true", help="Allow processes to have negative contributions")
Expand Down Expand Up @@ -130,15 +130,15 @@ def make_parser(parser=None):
parser.add_argument("--pseudoDataIdxs", type=str, nargs="+", default=[None], help="Variation indices to use as pseudodata for each of the histograms")
parser.add_argument("--pseudoDataFile", type=str, help="Input file for pseudodata (if it should be read from a different file)", default=None)
parser.add_argument("--pseudoDataProcsRegexp", type=str, default=".*", help="Regular expression for processes taken from pseudodata file (all other processes are automatically got from the nominal file). Data is excluded automatically as usual")
parser.add_argument("--pseudoDataFakes", type=str, nargs="+", choices=["truthMC", "closure", "simple", "extrapolate", "extended1D", "extended2D", "dataClosure", "mcClosure", "simple-binned", "extended1D-fakerate"],
parser.add_argument("--pseudoDataFakes", type=str, nargs="+", choices=["truthMC", "closure", "simple", "extrapolate", "extended1D", "extended2D", "dataClosure", "mcClosure", "simple-binned", "extended1D-binned", "extended1D-fakerate"],
help="Pseudodata for fakes are using QCD MC (closure), or different estimation methods (simple, extended1D, extended2D)")
parser.add_argument("--addTauToSignal", action='store_true', help="Events from the same process but from tau final states are added to the signal")
parser.add_argument("--noPDFandQCDtheorySystOnSignal", action='store_true', help="Removes PDF and theory uncertainties on signal processes")
parser.add_argument("--recoCharge", type=str, default=["plus", "minus"], nargs="+", choices=["plus", "minus"], help="Specify reco charge to use, default uses both. This is a workaround for unfolding/theory-agnostic fit when running a single reco charge, as gen bins with opposite gen charge have to be filtered out")
parser.add_argument("--massConstraintMode", choices=["automatic", "constrained", "unconstrained"], default="automatic", help="Whether mass is constrained within PDG value and uncertainty or unconstrained in the fit")
parser.add_argument("--decorMassWidth", action='store_true', help="remove width variations from mass variations")
parser.add_argument("--muRmuFPolVar", action="store_true", help="Use polynomial variations (like in theoryAgnosticPolVar) instead of binned variations for muR and muF (of course in setupCombine these are still constrained nuisances)")
parser.add_argument("--binByBinStatScaleForMW", type=float, default=1.125, help="scaling of bin by bin statistical uncertainty for W mass analysis")
parser.add_argument("--binByBinStatScaleForMW", type=float, default=1.10, help="scaling of bin by bin statistical uncertainty for W mass analysis")
parser.add_argument("--exponentialTransform", action='store_true', help="apply exponential transformation to yields (useful for gen-level fits to helicity cross sections for example)")
parser = make_subparsers(parser)

Expand Down Expand Up @@ -293,7 +293,7 @@ def setup(args, inputFile, inputBaseName, inputLumiScale, fitvar, genvar=None, x
datagroups.fakerate_axes=args.fakerateAxes
datagroups.set_histselectors(
datagroups.getNames(), inputBaseName, mode=args.fakeEstimation,
smoothing_mode=args.fakeSmoothingMode, smoothingOrderFakerate=args.fakeSmoothingOrder,
smoothing_mode=args.fakeSmoothingMode, smoothingOrderSpectrum=args.fakeSmoothingOrder,
mcCorr=args.fakeMCCorr,
integrate_x="mt" not in fitvar,
simultaneousABCD=simultaneousABCD, forceGlobalScaleFakes=args.forceGlobalScaleFakes,
Expand Down Expand Up @@ -377,7 +377,7 @@ def setup(args, inputFile, inputBaseName, inputLumiScale, fitvar, genvar=None, x
pseudodataGroups.fakerate_axes=args.fakerateAxes
pseudodataGroups.set_histselectors(
pseudodataGroups.getNames(), inputBaseName, mode=args.fakeEstimation,
smoothing_mode=args.fakeSmoothingMode, smoothingOrderFakerate=args.fakeSmoothingOrder,
smoothing_mode=args.fakeSmoothingMode, smoothingOrderSpectrum=args.fakeSmoothingOrder,
mcCorr=args.fakeMCCorr,
integrate_x="mt" not in fitvar,
simultaneousABCD=simultaneousABCD, forceGlobalScaleFakes=args.forceGlobalScaleFakes,
Expand All @@ -394,7 +394,7 @@ def setup(args, inputFile, inputBaseName, inputLumiScale, fitvar, genvar=None, x
pseudodataGroups.copyGroup("QCD", "QCDTruth")
pseudodataGroups.set_histselectors(
pseudodataGroups.getNames(), inputBaseName, mode=args.fakeEstimation, fake_processes=["QCD",],
smoothing_mode=args.fakeSmoothingMode, smoothingOrderFakerate=args.fakeSmoothingOrder,
smoothing_mode=args.fakeSmoothingMode, smoothingOrderSpectrum=args.fakeSmoothingOrder,
mcCorr=args.fakeMCCorr,
simultaneousABCD=simultaneousABCD, forceGlobalScaleFakes=args.forceGlobalScaleFakes,
)
Expand Down Expand Up @@ -743,18 +743,15 @@ def assertSample(name, startsWith=["W", "Z"], excludeMatch=[]):
cardTool.addLnNSystematic("CMS_background", processes=["Other"], size=1.15, group="CMS_background", splitGroup = {"experiment": ".*"},)
cardTool.addLnNSystematic("lumi", processes=['MCnoQCD'], size=1.017 if lowPU else 1.012, group="luminosity", splitGroup = {"experiment": ".*"},)

if cardTool.getFakeName() != "QCD" and cardTool.getFakeName() in datagroups.groups.keys() and not xnorm and (args.fakeSmoothingMode != "binned" or (args.fakeEstimation in ["extrapolate"] and "mt" in fitvar)):

# add stat uncertainty from QCD MC nonclosure to smoothing parameter covariance
hist_fake = datagroups.results["QCDmuEnrichPt15PostVFP"]["output"]["unweighted"].get()
if (
(cardTool.getFakeName() != "QCD" or args.qcdProcessName=="QCD")
and cardTool.getFakeName() in datagroups.groups.keys()
and not xnorm
and (args.fakeSmoothingMode != "binned" or (args.fakeEstimation in ["extrapolate"] and "mt" in fitvar))
):

fakeselector = cardTool.datagroups.groups[cardTool.getFakeName()].histselector

_0, _1, params, cov, _chi2, _ndf = fakeselector.calculate_fullABCD_smoothed(hist_fake, auxiliary_info=True)
_0, _1, params_d, cov_d, _chi2_d, _ndf_d = fakeselector.calculate_fullABCD_smoothed(hist_fake, auxiliary_info=True, signal_region=True)

fakeselector.external_cov = cov + cov_d

syst_axes = ["eta", "charge"] if (args.fakeSmoothingMode != "binned" or args.fakeEstimation not in ["extrapolate"]) else ["eta", "pt", "charge"]
info=dict(
name=inputBaseName,
Expand All @@ -766,13 +763,24 @@ def assertSample(name, startsWith=["W", "Z"], excludeMatch=[]):
applySelection=False, # don't apply selection, all regions will be needed for the action
action=fakeselector.get_hist,
systAxes=[f"_{x}" for x in syst_axes if x in args.fakerateAxes]+["_param", "downUpVar"])
subgroup = f"{cardTool.getFakeName()}Rate"
cardTool.addSystematic(**info,
rename=subgroup,
splitGroup = {subgroup: f".*", "experiment": ".*"},
systNamePrepend=subgroup,
actionArgs=dict(variations_smoothing=True),
)
if args.fakeSmoothingMode in ["hybrid", "full"]:
subgroup = f"{cardTool.getFakeName()}Smoothing"
cardTool.addSystematic(**info,
rename=subgroup,
splitGroup = {subgroup: f".*", "experiment": ".*"},
systNamePrepend=subgroup,
actionArgs=dict(variations_smoothing=True),
)

if args.fakeSmoothingMode in ["fakerate", "hybrid"]:
subgroup = f"{cardTool.getFakeName()}Rate"
cardTool.addSystematic(**info,
rename=subgroup,
splitGroup = {subgroup: f".*", "experiment": ".*"},
systNamePrepend=subgroup,
actionArgs=dict(variations_frf=True),
)

if args.fakeEstimation in ["extended2D",] and args.fakeSmoothingMode != "full":
subgroup = f"{cardTool.getFakeName()}Shape"
cardTool.addSystematic(**info,
Expand All @@ -782,14 +790,22 @@ def assertSample(name, startsWith=["W", "Z"], excludeMatch=[]):
actionArgs=dict(variations_scf=True),
)

if args.fakeSmoothingMode == "full":
# add nominal nonclosure of QCD MC as single systematic
def fake_nonclosure(h, axesToDecorrNames, *args, **kwargs):
# apply QCD MC nonclosure by adding parameters (assumes log space, e.g. in full smoothing)
fakeselector.external_params = params_d - params
if args.fakeSmoothingMode in ["hybrid", "full"] and args.fakeSmoothingOrder > 0:
# add systematic of explicit parameter variation
fakeSmoothingOrder = args.fakeSmoothingOrder
def fake_nonclosure(h, axesToDecorrNames, param_idx=1, variation_size=0.5, *args, **kwargs):
# apply variation by adding parameter value (assumes log space, e.g. in full smoothing)
fakeselector.spectrum_regressor.external_params = np.zeros(fakeSmoothingOrder + 1)
fakeselector.spectrum_regressor.external_params[param_idx] = variation_size
hvar = fakeselector.get_hist(h, *args, **kwargs)
# reset external parameters
fakeselector.external_params = None
fakeselector.spectrum_regressor.external_params = None

hnom = fakeselector.get_hist(h, *args, **kwargs)

# normalize variation histogram to have the same integral as nominal
hScale = hh.divideHists(hnom[{"pt":hist.sum}], hvar[{"pt":hist.sum}])
hvar = hh.multiplyHists(hvar, hScale)

if len(axesToDecorrNames) == 0:
# inclusive
Expand All @@ -799,28 +815,28 @@ def fake_nonclosure(h, axesToDecorrNames, *args, **kwargs):
data = hvar.values(flow=True)[...,np.newaxis]
)
else:
hnom = fakeselector.get_hist(h, *args, **kwargs)
hvar = syst_tools.decorrelateByAxes(hvar, hnom, axesToDecorrNames)

return hvar

for axesToDecorrNames in [[], ["eta"]]:
subgroup = f"{cardTool.getFakeName()}QCDMCNonclosure"
cardTool.addSystematic(
name=inputBaseName,
group="Fake",
rename=subgroup+ (f"_{'_'.join(axesToDecorrNames)}" if len(axesToDecorrNames) else ""),
splitGroup = {subgroup: f".*", "experiment": ".*"},
systNamePrepend=subgroup,
processes=cardTool.getFakeName(),
noConstraint=False,
mirror=True,
scale=1,
applySelection=False, # don't apply selection, external parameters need to be added
action=fake_nonclosure,
actionArgs=dict(axesToDecorrNames=axesToDecorrNames),
systAxes=["var"] if len(axesToDecorrNames)==0 else [f"{n}_decorr" for n in axesToDecorrNames],
)
for axesToDecorrNames in [[], ]:
for idx, mag in [(1,0.2),(2,0.2),(3,0.2)]:
subgroup = f"{cardTool.getFakeName()}Param{idx}"
cardTool.addSystematic(
name=inputBaseName,
group="Fake",
rename=subgroup+ (f"_{'_'.join(axesToDecorrNames)}" if len(axesToDecorrNames) else ""),
splitGroup = {subgroup: f".*", "experiment": ".*"},
systNamePrepend=subgroup,
processes=cardTool.getFakeName(),
noConstraint=False,
mirror=True,
scale=1,
applySelection=False, # don't apply selection, external parameters need to be added
action=fake_nonclosure,
actionArgs=dict(axesToDecorrNames=axesToDecorrNames, param_idx=idx, variation_size=mag),
systAxes=["var"] if len(axesToDecorrNames)==0 else [f"{n}_decorr" for n in axesToDecorrNames],
)

if not args.noEfficiencyUnc:

Expand Down
26 changes: 19 additions & 7 deletions scripts/plotting/controlPlotsHDF5.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
help="Select specific bins of the selectionAxis e.g. '0 1' to select the first bin of the first axis and second bin of the second axis")
parser.add_argument("--hists", type=str, nargs='*', default=None,
help="List of hists to plot; dash separated for unrolled hists")
parser.add_argument("--normToData", action='store_true', help="Normalize MC to data")
parser.add_argument("--dataName", type=str, default="Data", help="Data name for plot labeling")

# variations
parser.add_argument("--varName", type=str, nargs='*', default=[], help="Name of variation hist")
Expand Down Expand Up @@ -123,6 +125,13 @@ def make_plot(hists_proc, hist_data, hists_syst_up, hists_syst_dn, axes_names,
else:
hists_pred = [hh.sumHists(h_stack)]

if args.normToData:
scale = h_data.values().sum()/hists_pred[0].values().sum()
h_stack = [hh.scaleHist(h, scale) for h in h_stack]
hists_pred[0] = hh.scaleHist(hists_pred[0], scale)
hists_syst_up = [hh.scaleHist(h, scale) for h in hists_syst_up]
hists_syst_dn = [hh.scaleHist(h, scale) for h in hists_syst_dn]

# loop over all processes if plots for each process is requested, or inclusive otherwise
for i, h_pred in enumerate(hists_pred):

Expand Down Expand Up @@ -167,7 +176,7 @@ def make_plot(hists_proc, hist_data, hists_syst_up, hists_syst_dn, axes_names,
yerr=True,
histtype="errorbar",
color="black",
label="Data",
label=args.dataName,
binwnorm=binwnorm,
ax=ax1,
alpha=1.,
Expand Down Expand Up @@ -252,11 +261,14 @@ def make_plot(hists_proc, hist_data, hists_syst_up, hists_syst_dn, axes_names,
if selections is not None:
for i, (key, idx) in enumerate(selections.items()):
lo, hi = selection_edges[i]
label = styles.xlabels[key].replace(" (GeV)","")
if lo != None:
label = f"{lo} < {label}"
if hi != None:
label = f"{label} < {hi}"
if key=="charge":
label = f"charge = {'-1' if i==0 else +1}"
else:
label = styles.xlabels[key].replace(" (GeV)","")
if lo != None:
label = f"{lo} < {label}"
if hi != None:
label = f"{label} < {hi}"

ax1.text(0.05, 0.96-i*0.08, label, horizontalalignment='left', verticalalignment='top', transform=ax1.transAxes,
fontsize=20*args.scaleleg*scale)
Expand All @@ -274,7 +286,7 @@ def make_plot(hists_proc, hist_data, hists_syst_up, hists_syst_dn, axes_names,
# offset_text.set_position((-0.08,1.02))

if args.cmsDecor:
lumi = float(f"{channel_info['lumi']:.3g}") if not density else None
lumi = float(f"{channel_info['lumi']:.3g}") if not density and args.dataName=="Data" else None

hep.cms.label(ax=ax1, lumi=lumi, fontsize=legtext_size*scale,
label= args.cmsDecor, data=hist_data is not None)
Expand Down
6 changes: 3 additions & 3 deletions scripts/plotting/makeDataMCStackPlot.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@
parser.add_argument("--presel", type=str, nargs="*", default=[], help="Specify custom selections on input histograms to integrate some axes, giving axis name and min,max (e.g. '--presel pt=ptmin,ptmax' ) or just axis name for bool axes")
parser.add_argument("--normToData", action='store_true', help="Normalize MC to data")
parser.add_argument("--fakeEstimation", type=str, help="Set the mode for the fake estimation", default="extended1D", choices=["simple", "extrapolate", "extended1D", "extended2D"])
parser.add_argument("--fakeSmoothingMode", type=str, default="full", choices=["binned", "fakerate", "full"], help="Smoothing mode for fake estimate.")
parser.add_argument("--fakeSmoothingMode", type=str, default="hybrid", choices=["binned", "fakerate", "hybrid", "full"], help="Smoothing mode for fake estimate.")
parser.add_argument("--fakeMCCorr", type=str, default=[None], nargs="*", choices=["none", "pt", "eta", "mt"], help="axes to apply nonclosure correction from QCD MC. Leave empty for inclusive correction, use'none' for no correction")
parser.add_argument("--forceGlobalScaleFakes", default=None, type=float, help="Scale the fakes by this factor (overriding any custom one implemented in datagroups.py in the fakeSelector).")
parser.add_argument("--fakeSmoothingOrder", type=int, default=3, help="Order of the polynomial for the smoothing of the fake rate or full prediction, depending on the smoothing mode")
parser.add_argument("--fakeSmoothingOrder", type=int, default=3, help="Order of the polynomial for the smoothing of the application region or full prediction, depending on the smoothing mode")
parser.add_argument("--fakerateAxes", nargs="+", help="Axes for the fakerate binning", default=["eta","pt","charge"])
parser.add_argument("--fineGroups", action='store_true', help="Plot each group as a separate process, otherwise combine groups based on predefined dictionary")

Expand Down Expand Up @@ -146,7 +146,7 @@ def padArray(ref, matchLength):
datasets,
args.baseName,
smoothing_mode=args.fakeSmoothingMode,
smoothingOrderFakerate=args.fakeSmoothingOrder,
smoothingOrderSpectrum=args.fakeSmoothingOrder,
integrate_x=all("mt" not in x.split("-") for x in args.hists),
mode=args.fakeEstimation,
forceGlobalScaleFakes=args.forceGlobalScaleFakes,
Expand Down
Loading

0 comments on commit 42d7fd3

Please sign in to comment.