Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updates to implentation of MiNNLO muR/muF polynomial variations (also to use them for unfolding and theory agnostic for OOA) #508

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion scripts/combine/setupCombine.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ def make_parser(parser=None):
parser.add_argument("--forceConstrainMass", action='store_true', help="force mass to be constrained in fit")
parser.add_argument("--decorMassWidth", action='store_true', help="remove width variations from mass variations")
parser.add_argument("--muRmuFPolVar", action="store_true", help="Use polynomial variations (like in theoryAgnosticPolVar) instead of binned variations for muR and muF (of course in setupCombine these are still constrained nuisances)")
parser.add_argument("--muRmuFFullyCorr", action="store_true", help="Use the fully correlated muR/muF variations derived from theoryAgnosticPolVar polynomial variations instead of the binned ones (by default you take the binned fully correlated variations even if you are using the polynomial variations for muR/muF, they just take care of the \"uncorrelated\" part) (this option only works with --muRmuFPolVar, it doesn't work on its own)")
parser = make_subparsers(parser)

return parser
Expand Down Expand Up @@ -405,6 +406,9 @@ def setup(args, inputFile, inputBaseName, inputLumiScale, fitvar, genvar=None, x
def assertSample(name, startsWith=["W", "Z"], excludeMatch=[]):
return any(name.startswith(init) for init in startsWith) and all(excl not in name for excl in excludeMatch)

def assertSampleWithOOA(name, startsWith=["W", "Z"], excludeMatch=[]):
return any(name.startswith(init) for init in startsWith) and name.endswith("OOA") and all(excl not in name for excl in excludeMatch)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might probably be possible to use the function assertSample adding more arguments to require the presence of a string or that it appears at the end (its presence is probably sufficient in this case)

dibosonMatch = ["WW", "WZ", "ZZ"]
WMatch = ["W"] # TODO: the name of out-of-acceptance might be changed at some point, maybe to WmunuOutAcc, so W will match it as well (and can exclude it using "OutAcc" if needed)
ZMatch = ["Z"]
Expand Down Expand Up @@ -432,6 +436,11 @@ def assertSample(name, startsWith=["W", "Z"], excludeMatch=[]):
cardTool.addProcessGroup("nonsignal_samples_noOutAcc", lambda x: assertSample(x, startsWith=nonSignalMatch, excludeMatch=[*dibosonMatch, "tau", "OOA"]))
cardTool.addProcessGroup("signal_samples_inctau_noOutAcc", lambda x: assertSample(x, startsWith=signalMatch, excludeMatch=[*dibosonMatch, "OOA"]))
cardTool.addProcessGroup("nonsignal_samples_inctau_noOutAcc", lambda x: assertSample(x, startsWith=nonSignalMatch, excludeMatch=[*dibosonMatch, "OOA"]))
# FIX for theoryAgnosticPolVar when using MiNNLO polynomial variations (we would still need these for the OOA)
cardTool.addProcessGroup("signal_samples_OOA", lambda x: assertSampleWithOOA(x, startsWith=signalMatch, excludeMatch=[*dibosonMatch, "tau"]))
cardTool.addProcessGroup("nonsignal_samples_OOA", lambda x: assertSampleWithOOA(x, startsWith=nonSignalMatch, excludeMatch=[*dibosonMatch, "tau"]))
cardTool.addProcessGroup("signal_samples_inctau_OOA", lambda x: assertSampleWithOOA(x, startsWith=signalMatch, excludeMatch=[*dibosonMatch]))
cardTool.addProcessGroup("nonsignal_samples_inctau_OOA", lambda x: assertSampleWithOOA(x, startsWith=nonSignalMatch, excludeMatch=[*dibosonMatch]))

if not (isTheoryAgnostic or isUnfolding) :
logger.info(f"All MC processes {cardTool.procGroups['MCnoQCD']}")
Expand Down Expand Up @@ -588,7 +597,7 @@ def assertSample(name, startsWith=["W", "Z"], excludeMatch=[]):
for g in cardTool.procGroups["signal_samples"] for m in cardTool.datagroups.groups[g].members},
)

if args.muRmuFPolVar and not isTheoryAgnosticPolVar:
if args.muRmuFPolVar and not isPoiAsNoi:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this change correct? Right now we are always using isPoiAsNoi = True (except maybe for one case with unfolding), so this condition would never be entered

muRmuFPolVar_helper = combine_theoryAgnostic_helper.TheoryAgnosticHelper(cardTool, externalArgs=args)
muRmuFPolVar_helper.configure_polVar(label,
passSystToFakes,
Expand Down
7 changes: 2 additions & 5 deletions scripts/histmakers/mw_with_mu_eta_pt.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,23 +649,20 @@ def build_graph(df, dataset):
# End graph here only for standard theory agnostic analysis, otherwise use same loop as traditional analysis
return results, weightsum

if isWorZ and not hasattr(dataset, "out_of_acceptance"):
if isWorZ: #this needs to be created also for out-of-acceptance, when present (e.g. unfolding, theory-agnostic, etc.)
theoryAgnostic_helpers_cols = ["qtOverQ", "absYVgen", "chargeVgen", "csSineCosThetaPhigen", "nominal_weight"]
# assume to have same coeffs for plus and minus (no reason for it not to be the case)
if dataset.name == "WplusmunuPostVFP" or dataset.name == "WplustaunuPostVFP":
helpers_class = muRmuFPolVar_helpers_plus
process_name = "W"
elif dataset.name == "WminusmunuPostVFP" or dataset.name == "WminustaunuPostVFP":
helpers_class = muRmuFPolVar_helpers_minus
process_name = "W"
elif dataset.name == "ZmumuPostVFP" or dataset.name == "ZtautauPostVFP":
helpers_class = muRmuFPolVar_helpers_Z
process_name = "Z"
for coeffKey in helpers_class.keys():
logger.debug(f"Creating muR/muF histograms with polynomial variations for {coeffKey}")
helperQ = helpers_class[coeffKey]
df = df.Define(f"muRmuFPolVar_{coeffKey}_tensor", helperQ, theoryAgnostic_helpers_cols)
noiAsPoiWithPolHistName = Datagroups.histName("nominal", syst=f"muRmuFPolVar{process_name}_{coeffKey}")
noiAsPoiWithPolHistName = Datagroups.histName("nominal", syst=f"muRmuFPolVar_{coeffKey}")
results.append(df.HistoBoost(noiAsPoiWithPolHistName, nominal_axes, [*nominal_cols, f"muRmuFPolVar_{coeffKey}_tensor"], tensor_axes=helperQ.tensor_axes, storage=hist.storage.Double()))

if not args.onlyMainHistograms:
Expand Down
5 changes: 2 additions & 3 deletions scripts/histmakers/mz_wlike_with_mu_eta_pt.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,17 +393,16 @@ def build_graph(df, dataset):
nominal = df.HistoBoost("nominal", axes, [*cols, "nominal_weight"])
results.append(nominal)

if isZ and not hasattr(dataset, "out_of_acceptance"):
if isZ: #this needs to be created also for out-of-acceptance, when present (e.g. unfolding, theory-agnostic, etc.)
theoryAgnostic_helpers_cols = ["qtOverQ", "absYVgen", "chargeVgen", "csSineCosThetaPhigen", "nominal_weight"]
# assume to have same coeffs for plus and minus (no reason for it not to be the case)
if dataset.name == "ZmumuPostVFP" or dataset.name == "ZtautauPostVFP":
helpers_class = muRmuFPolVar_helpers_Z
process_name = "Z"
for coeffKey in helpers_class.keys():
logger.debug(f"Creating muR/muF histograms with polynomial variations for {coeffKey}")
helperQ = helpers_class[coeffKey]
df = df.Define(f"muRmuFPolVar_{coeffKey}_tensor", helperQ, theoryAgnostic_helpers_cols)
noiAsPoiWithPolHistName = Datagroups.histName("nominal", syst=f"muRmuFPolVar{process_name}_{coeffKey}")
noiAsPoiWithPolHistName = Datagroups.histName("nominal", syst=f"muRmuFPolVar_{coeffKey}")
results.append(df.HistoBoost(noiAsPoiWithPolHistName, nominal_axes, [*nominal_cols, f"muRmuFPolVar_{coeffKey}_tensor"], tensor_axes=helperQ.tensor_axes, storage=hist.storage.Double()))

if not args.noRecoil and args.recoilUnc:
Expand Down
2 changes: 1 addition & 1 deletion utilities/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ def __call__(self, parser, namespace, values, option_string=None):
parser.add_argument("--noScaleToData", action="store_true", help="Do not scale the MC histograms with xsec*lumi/sum(gen weights) in the postprocessing step")
parser.add_argument("--aggregateGroups", type=str, nargs="*", default=["Diboson", "Top"], help="Sum up histograms from members of given groups in the postprocessing step")
parser.add_argument("--muRmuFPolVarFilePath", type=str, default=f"{data_dir}/MiNNLOmuRmuFPolVar/", help="Path where input files are stored")
parser.add_argument("--muRmuFPolVarFileTag", type=str, default="x0p50_y4p00_ConstrPol5ExtYdep_Trad", choices=["x0p50_y4p00_ConstrPol5ExtYdep_Trad","x0p50_y4p00_ConstrPol5Ext_Trad"],help="Tag for input files")
parser.add_argument("--muRmuFPolVarFileTag", type=str, default="x0p50_y4p00_Fix_Trad", choices=["x0p50_y4p00_Fix_Trad","x0p50_y4p00_FixYdep_Trad"],help="Tag for input files")
parser.add_argument("--nToysMC", type=int, help="random toys for data and MC", default=-1)
parser.add_argument("--varianceScalingForToys", type=int, default=1, help="Scaling of variance for toys (effective mc statistics corresponds to 1./scaling)")
parser.add_argument("--randomSeedForToys", type=int, default=0, help="random seed for toys")
Expand Down
2 changes: 1 addition & 1 deletion wremnants-data
Submodule wremnants-data updated from a31cce to dfb2ca
82 changes: 50 additions & 32 deletions wremnants/combine_theoryAgnostic_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,35 +56,51 @@ def add_theoryAgnostic_polVar_uncertainty(self):
#splitGroup={f"{groupName}_{coeffKey}" : f"{groupName}_{coeffKey}"}
)

def add_muRmuF_polVar_uncertainty(self):
def add_muRmuF_polVar_uncertainty(self, isTheoryAgnostic):
coeffs = [f"A{i}" for i in range(8)]
signalGroupName = "muRmuFPolVarW" if self.label == "W" else "muRmuFPolVarZ"
nonSignalGroupName = "muRmuFPolVarZ" if self.label == "W" else "muRmuFPolVarW"
for coeffKey in coeffs:
self.card_tool.addSystematic(f"{signalGroupName}_{coeffKey}",
group=signalGroupName,
mirror=False,
passToFakes=self.passSystToFakes,
processes=["signal_samples_inctau_noOutAcc" if self.separateOutOfAccSignal else "signal_samples_inctau"],
baseName=f"{signalGroupName}_{coeffKey}_",
noConstraint=False,
systAxes=["nPolVarSyst", "downUpVar"],
labelsByAxis=["v", "downUpVar"],
#splitGroup={f"{groupName}_{coeffKey}" : f"{groupName}_{coeffKey}"}
)

for coeffKey in coeffs:
self.card_tool.addSystematic(f"{nonSignalGroupName}_{coeffKey}",
group=nonSignalGroupName,
mirror=False,
passToFakes=self.passSystToFakes,
processes=["nonsignal_samples_inctau_noOutAcc" if self.separateOutOfAccSignal else "nonsignal_samples_inctau"],
baseName=f"{nonSignalGroupName}_{coeffKey}_",
noConstraint=False,
systAxes=["nPolVarSyst", "downUpVar"],
labelsByAxis=["v", "downUpVar"],
#splitGroup={f"{groupName}_{coeffKey}" : f"{groupName}_{coeffKey}"}
)
signalProcesses = "signal_samples_inctau_OOA" if isTheoryAgnostic else "signal_samples_inctau"
nonsignalProcesses = "nonsignal_samples_inctau_OOA" if isTheoryAgnostic else "nonsignal_samples_inctau"
ProcessNames = [signalGroupName,nonSignalGroupName]
Processes = [signalProcesses,nonsignalProcesses]
if self.args.muRmuFFullyCorr:
signal_expanded_samples = self.card_tool.getProcNames([signalProcesses])
nonsignal_expanded_samples = self.card_tool.getProcNames([nonsignalProcesses])
func = syst_tools.muRmuFFullyCorrelatedVariations
signal_preop_map = {proc : func for proc in signal_expanded_samples}
nonsignal_preop_map = {proc : func for proc in nonsignal_expanded_samples}
preop_args = {}
PreOpMaps = [signal_preop_map,nonsignal_preop_map]
for Index in range(0,len(ProcessNames)):
for coeffKey in coeffs:
self.card_tool.addSystematic(f"muRmuFPolVar_{coeffKey}",
group=ProcessNames[Index],
mirror=False,
passToFakes=self.passSystToFakes,
processes=[Processes[Index]],
baseName=f"{ProcessNames[Index]}_{coeffKey}_",
noConstraint=False,
systAxes=["nPolVarSyst", "downUpVar"],
labelsByAxis=["v", "downUpVar"],
rename=f"{ProcessNames[Index]}_{coeffKey}",
#splitGroup={f"{groupName}_{coeffKey}" : f"{groupName}_{coeffKey}"}
)
if self.args.muRmuFFullyCorr:
self.card_tool.addSystematic(f"muRmuFPolVar_{coeffKey}",
preOpMap=PreOpMaps[Index],
preOpArgs=preop_args,
group=ProcessNames[Index],
mirror=False,
passToFakes=self.passSystToFakes,
processes=[Processes[Index]],
baseName=f"{ProcessNames[Index]}_{coeffKey}_FullyCorr_",
noConstraint=False,
systAxes=["nPolVarSyst", "downUpVar"],
labelsByAxis=["v", "downUpVar"],
rename=f"{ProcessNames[Index]}_{coeffKey}_FullyCorr",
#splitGroup={f"{groupName}_{coeffKey}" : f"{groupName}_{coeffKey}"}
)


def add_theoryAgnostic_normVar_uncertainty(self, flow=True):
Expand Down Expand Up @@ -195,9 +211,11 @@ def add_theoryAgnostic_normVar_uncertainty(self, flow=True):
)

def add_theoryAgnostic_uncertainty(self):
if self.args.analysisMode == "theoryAgnosticPolVar":
self.add_theoryAgnostic_polVar_uncertainty()
elif self.args.muRmuFPolVar == True:
self.add_muRmuF_polVar_uncertainty()
else:
self.add_theoryAgnostic_normVar_uncertainty()
isTheoryAgnostic = self.args.analysisMode in ["theoryAgnosticNormVar", "theoryAgnosticPolVar"]
if isTheoryAgnostic:
if self.args.analysisMode == "theoryAgnosticPolVar":
self.add_theoryAgnostic_polVar_uncertainty()
else:
self.add_theoryAgnostic_normVar_uncertainty()
if self.args.muRmuFPolVar == True:
self.add_muRmuF_polVar_uncertainty(isTheoryAgnostic)
3 changes: 2 additions & 1 deletion wremnants/combine_theory_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,8 @@ def add_resum_unc(self, scale=1):

self.add_minnlo_scale_uncertainty(sample_group, extra_name = "fine", rebin_pt=fine_pt_binning, helicities_to_exclude=helicities_to_exclude)

self.add_minnlo_scale_uncertainty(sample_group, extra_name = "inclusive", rebin_pt=[common.ptV_binning[0], common.ptV_binning[-1]], helicities_to_exclude=helicities_to_exclude, scale = scale_inclusive)
if not (self.args.muRmuFPolVar and self.args.muRmuFFullyCorr):
self.add_minnlo_scale_uncertainty(sample_group, extra_name = "inclusive", rebin_pt=[common.ptV_binning[0], common.ptV_binning[-1]], helicities_to_exclude=helicities_to_exclude, scale = scale_inclusive)

# additional uncertainty for effect of shower and intrinsic kt on angular coeffs
self.add_helicity_shower_kt_uncertainty()
Expand Down
18 changes: 18 additions & 0 deletions wremnants/syst_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,24 @@ def hist_to_variations(hist_in, gen_axes = [], sum_axes = [], rebin_axes=[], reb

return variation_hist

def muRmuFFullyCorrelatedVariations(hist_in):

if hist_in.name is None:
out_name = "hist_variations"
else:
out_name = hist_in.name + "_variations"

variation_hist = hist.Hist(*hist_in.axes, name = out_name, storage = hist.storage.Double())
variation_hist = variation_hist[...,0:1,:]

for i in range(len(hist_in.axes[len(hist_in.axes)-2])):
Copy link
Collaborator

@cippy cippy Aug 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Once you select the axis by ID, the number of bins for that axis can be obtained with hist.axes[id].size (or .extent if you want to include possible overflow bins when they exist, .size should always return the number of bins in acceptance regardless).

In general, is it always guaranteed that the axis you want will always be the second from last, rather than the n-th from the left for example ? Perhaps, it might be safer to select the axis by name, which is more guaranteed to stay as it is (and if the name is eventually modified outside then it won't be found here and the code will just raise errors)

variation_hist.values(flow=True)[...,0,0] = variation_hist.values(flow=True)[...,0,0] + hist_in.values(flow=True)[...,i,0] - (hist_in.values(flow=True)[...,0,0]+hist_in.values(flow=True)[...,0,1])/2 #this average which is subtracted is the nominal histogram (by construction, it's directly related to how the weights are defined)
variation_hist.values(flow=True)[...,0,1] = variation_hist.values(flow=True)[...,0,1] + hist_in.values(flow=True)[...,i,1] - (hist_in.values(flow=True)[...,0,0]+hist_in.values(flow=True)[...,0,1])/2
variation_hist.values(flow=True)[...,0,0] = variation_hist.values(flow=True)[...,0,0] + (hist_in.values(flow=True)[...,0,0]+hist_in.values(flow=True)[...,0,1])/2
variation_hist.values(flow=True)[...,0,1] = variation_hist.values(flow=True)[...,0,1] + (hist_in.values(flow=True)[...,0,0]+hist_in.values(flow=True)[...,0,1])/2

return variation_hist


def uncertainty_hist_from_envelope(h, proj_ax, entries):
hdown = hh.syst_min_or_max_env_hist(h, proj_ax, "vars", entries, no_flow=["ptVgen"], do_min=True)
Expand Down
Loading