From 0f64787f12190caddb62a87ce23635e39fde6583 Mon Sep 17 00:00:00 2001 From: David Date: Tue, 9 Jul 2024 11:54:18 +0200 Subject: [PATCH] First modifications of PR445 --- .github/workflows/unfolding.yml | 4 +- scripts/combine/pullsAndImpacts.py | 101 ++-- scripts/combine/setupCombine.py | 306 +++++----- scripts/histmakers/mw_with_mu_eta_pt.py | 2 +- scripts/histmakers/mz_wlike_with_mu_eta_pt.py | 5 +- scripts/plotting/controlPlotsHDF5.py | 53 +- scripts/plotting/inclusive_xsec_summary.py | 227 ++++++++ scripts/plotting/plot_angular_coefficients.py | 248 ++++---- scripts/plotting/plot_harmonic_polynomials.py | 137 +++++ scripts/plotting/postfitPlots.py | 2 +- scripts/plotting/response_matrix.py | 212 ++++++- scripts/plotting/unfolding_xsec.py | 541 ++++++++++-------- scripts/utilities/fitresult_pois_to_hist.py | 2 +- utilities/common.py | 4 +- utilities/io_tools/conversion_tools.py | 217 +++++-- utilities/styles/styles.py | 22 +- wremnants/HDF5Writer.py | 85 ++- wremnants/combine_helpers.py | 110 +++- wremnants/theoryAgnostic_tools.py | 4 +- wremnants/unfolding_tools.py | 67 ++- 20 files changed, 1666 insertions(+), 683 deletions(-) create mode 100644 scripts/plotting/inclusive_xsec_summary.py create mode 100644 scripts/plotting/plot_harmonic_polynomials.py diff --git a/.github/workflows/unfolding.yml b/.github/workflows/unfolding.yml index 97b65dc46..527ed2581 100644 --- a/.github/workflows/unfolding.yml +++ b/.github/workflows/unfolding.yml @@ -422,14 +422,14 @@ jobs: # run with a reduced binning if: github.event_name == 'pull_request' || github.event.schedule == '30 5 * * 1-5' run: >- - scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run_python.sh scripts/histmakers/mz_dilepton.py --dataPath $DATAPATH + scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run_python.sh scripts/histmakers/mz_dilepton.py --dataPath $DATAPATH --excludeFlow --analysisMode unfolding -j $((NTHREADS)) --maxFiles $((MAX_FILES)) --forceDefaultName -o $WREMNANTS_OUTDIR --axes ptll yll --postfix unfolding --genAxes ptVGen - name: dilepton analysis # run with full binning if: github.event_name != 'pull_request' && github.event.schedule != '30 5 * * 1-5' run: >- - scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run_python.sh scripts/histmakers/mz_dilepton.py --dataPath $DATAPATH + scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run_python.sh scripts/histmakers/mz_dilepton.py --dataPath $DATAPATH --excludeFlow --analysisMode unfolding -j $((NTHREADS)) --maxFiles $((MAX_FILES)) --forceDefaultName -o $WREMNANTS_OUTDIR --axes ptll yll --postfix unfolding diff --git a/scripts/combine/pullsAndImpacts.py b/scripts/combine/pullsAndImpacts.py index 60c06d490..955e92e54 100644 --- a/scripts/combine/pullsAndImpacts.py +++ b/scripts/combine/pullsAndImpacts.py @@ -10,14 +10,17 @@ from dash import html from dash.dependencies import Input, Output from utilities import logging -from utilities.io_tools import input_tools, output_tools, combinetf_input +from utilities.io_tools import input_tools, output_tools, combinetf_input, conversion_tools from wremnants import plot_tools import os import re import json +from narf import ioutils from utilities.styles.styles import nuisance_groupings as groupings +logger = logging.child_logger(__name__) + def writeOutput(fig, outfile, extensions=[], postfix=None, args=None, meta_info=None): name, ext = os.path.splitext(outfile) if ext not in extensions: @@ -87,11 +90,11 @@ def plotImpacts(df, impact_title="", pulls=False, normalize=False, oneSidedImpac if impacts and include_ref: # append numerical values of impacts on nuisance name; fill up empty room with spaces to align numbers - frmt = "{:0"+str(int(np.log10(max(df[impact_str])))+2)+".2f}" + frmt = "{:0"+str(int(np.log10(max(df[impact_str])) if max(df[f"{impact_str}_ref"])>0 else 0)+2)+".2f}" nval = df[impact_str].apply(lambda x,frmt=frmt: frmt.format(x)) #.astype(str) nspace = nval.apply(lambda x, n=nval.apply(len).max(): " "*(n - len(x))) if include_ref: - frmt_ref = "{:0"+str(int(np.log10(max(df[f"{impact_str}_ref"])))+2)+".2f}" + frmt_ref = "{:0"+str(int(np.log10(max(df[f"{impact_str}_ref"])) if max(df[f"{impact_str}_ref"])>0 else 0)+2)+".2f}" nval_ref = df[f'{impact_str}_ref'].apply(lambda x,frmt=frmt_ref: " ("+frmt.format(x)+")") #.round(2).astype(str) nspace_ref = nval_ref.apply(lambda x, n=nval_ref.apply(len).max(): " "*(n - len(x))) nval = nval+nspace_ref+nval_ref @@ -256,16 +259,17 @@ def plotImpacts(df, impact_title="", pulls=False, normalize=False, oneSidedImpac return fig -def readFitInfoFromFile(rf, filename, poi, group=False, stat=0.0, normalize=False, scale=1): - impacts, labels, _ = combinetf_input.read_impacts_poi(rf, group, add_total=group, stat=stat, poi=poi, normalize=normalize) - - if (group and grouping) or args.filters: +def readFitInfoFromFile(rf, filename, poi, group=False, grouping=None, filters=[], stat=0.0, normalize=False, scale=1): + logger.debug("Read impacts for poi from file") + impacts, labels, norm = combinetf_input.read_impacts_poi(rf, group, add_total=group, stat=stat, poi=poi, normalize=normalize) + + if (group and grouping) or filters: filtimpacts = [] filtlabels = [] for impact,label in zip(impacts,labels): if group and grouping and label not in grouping: continue - if args.filters and not any(re.match(f, label) for f in args.filters): + if filters and not any(re.match(f, label) for f in filters): continue filtimpacts.append(impact) filtlabels.append(label) @@ -293,8 +297,9 @@ def readFitInfoFromFile(rf, filename, poi, group=False, stat=0.0, normalize=Fals def parseArgs(): sort_choices = ["label", "abspull", "constraint", "absimpact"] sort_choices += [ - *[f"{c}_diff" for c in sort_choices], # possibility to sort based on largest difference between inputfile and referencefile - *[f"{c}_ref" for c in sort_choices] ] # possibility to sort based on referencefile + *[f"{c}_diff" for c in sort_choices], # possibility to sort based on largest difference between input and referencefile + *[f"{c}_ref" for c in sort_choices], # possibility to sort based on reference file + *[f"{c}_both" for c in sort_choices] ] # possibility to sort based on the largest/smallest of both input and reference file parser = argparse.ArgumentParser() parser.add_argument("-f", "--inputFile", type=str, required=True, help="fitresults output ROOT/hdf5 file from combinetf") @@ -311,6 +316,8 @@ def parseArgs(): parser.add_argument("--grouping", type=str, default=None, help="Select nuisances by a predefined grouping", choices=groupings.keys()) parser.add_argument("-t","--translate", type=str, default=None, help="Specify .json file to translate labels") parser.add_argument("--noImpacts", action='store_true', help="Don't show impacts") + parser.add_argument("--poi", type=str, default=None, help="Specify POI to make impacts for, otherwise use all") + parser.add_argument("--poiType", type=str, default=None, help="POI type to make impacts for") parsers = parser.add_subparsers(dest='output_mode') interactive = parsers.add_parser("interactive", help="Launch and interactive dash session") interactive.add_argument("-i", "--interface", default="localhost", help="The network interface to bind to.") @@ -335,7 +342,7 @@ def parseArgs(): [Input("groups", "on")], ) -def producePlots(fitresult, args, poi, group=False, normalize=False, fitresult_ref=None): +def producePlots(fitresult, args, poi, group=False, normalize=False, fitresult_ref=None, grouping=None): poi_type = poi.split("_")[-1] if poi else None if poi is not None and "MeV" in poi: @@ -354,53 +361,58 @@ def producePlots(fitresult, args, poi, group=False, normalize=False, fitresult_r impact_title = "$\\mathrm{Impact\\ on\\ mass\\ diff. }(\\eta)\\ (\\mathrm{MeV})$" else: impact_title = "Impact on mass diff. (MeV)" - elif poi and poi.startswith("Width"): + elif poi and poi.startswith("width"): impact_title = "Impact on width (MeV)" + elif poi_type in ["pmaskedexp", "pmaskedexpnorm", "sumxsec", "sumxsecnorm"]: + if poi_type in ["pmaskedexp", "sumxsec"]: + meta = ioutils.pickle_load_h5py(fitresult["meta"]) + channel_info = conversion_tools.combine_channels(meta, True) + if len(channel_info.keys()) == 1: + lumi = channel_info["chan_13TeV"]["lumi"] + else: + raise NotImplementedError(f"Found channels {[k for k in channel_info.keys()]} but only one channel is supported.") + scale = 1./(lumi*1000) + poi_name = "_".join(poi.split("_")[:-1]) + impact_title = "$\\sigma_\\mathrm{fid}("+poi_name+") [\\mathrm{pb}]$" + else: + impact_title = "$1/\\sigma_\\mathrm{fid} \\mathrm{d}\\sigma$" + elif poi_type in ["ratiometaratio"]: + poi_name = "_".join(poi.split("_")[:-1]).replace("r_","") + impact_title = f"Impact on ratio {poi_name} *1000" + scale=1000 else: impact_title=poi if not (group and args.output_mode == 'output'): df = readFitInfoFromFile(fitresult, args.inputFile, poi, False, stat=args.stat/100., normalize=normalize, scale=scale) elif group: - df = readFitInfoFromFile(fitresult, args.inputFile, poi, True, stat=args.stat/100., normalize=normalize, scale=scale) + df = readFitInfoFromFile(fitresult, args.inputFile, poi, True, stat=args.stat/100., normalize=normalize, scale=scale, grouping=grouping) if fitresult_ref: - df_ref = readFitInfoFromFile(fitresult_ref, args.referenceFile, poi, group, stat=args.stat/100., normalize=normalize, scale=scale) - df = df.merge(df_ref, how="left", on="label", suffixes=("","_ref")) + df_ref = readFitInfoFromFile(fitresult_ref, args.referenceFile, poi, group, stat=args.stat/100., normalize=normalize, scale=scale, grouping=grouping) + df = df.merge(df_ref, how="outer", on="label", suffixes=("","_ref")) - if group and fitresult_ref and set(fitresult_ref["hsysts"]) != set(fitresult["hsysts"]): - # add another group for the uncertainty from all systematics that are not common among the two groups; - # defined as err = sqrt( variance(total) - variance(common nuisances) ) - np_common = [s in fitresult_ref["hsysts"][...] for s in fitresult["hsysts"][...]] - np_common_ref = [s in fitresult["hsysts"][...] for s in fitresult_ref["hsysts"][...]] - cov = fitresult["cov"][np_common, :][:, np_common] - cov_ref = fitresult_ref["cov"][np_common_ref, :][:, np_common_ref] - - jac = fitresult["nuisance_impact_nois"][:,np_common] - jac_ref = fitresult_ref["nuisance_impact_nois"][:,np_common_ref] - - df_total = df.loc[df["label"] == "Total"] - impact_others = np.sqrt(df_total["impact"].values[0]**2 - scale**2 * (jac @ (cov @ jac.T))[0][0]) - impact_others_ref = np.sqrt(df_total["impact_ref"].values[0]**2 - scale**2 * (jac_ref @ (cov_ref @ jac_ref.T))[0][0]) - - new_row = {"label": "Others", - "impact": impact_others, "absimpact": abs(impact_others), "impact_color": df_total["impact_color"].values[0], - "impact_ref": impact_others_ref, "absimpact_ref": abs(impact_others_ref), "impact_color_ref": df_total["impact_color"].values[0], - } - - df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True) - # remove rows with nuisance groups that are only included in one group but not in the other, since those are included in "Others" already - df = df.dropna() + # Set default values for missing entries in respective columns + default_values = {'impact_color': "#377eb8", 'impact_color_ref': "#377eb8"} + for col in df.columns: + df[col].fillna(default_values.get(col, 0), inplace=True) if args.sort: if args.sort.endswith("diff"): + logger.debug("Sort impacts") key = args.sort.replace("_diff","") df[f"{key}_diff"] = df[key] - df[f"{key}_ref"] + elif args.sort.endswith("both"): + key = args.sort.replace("_both","") + if args.ascending: + df[f"{key}_both"] = df[[key, f"{key}_ref"]].max(axis=1) + else: + df[f"{key}_both"] = df[[key, f"{key}_ref"]].min(axis=1) df = df.sort_values(by=args.sort, ascending=args.ascending) df = df.fillna(0) - + logger.debug("Make plots") if args.output_mode == "interactive": app.layout = html.Div([ dcc.Input( @@ -492,9 +504,16 @@ def producePlots(fitresult, args, poi, group=False, normalize=False, fitresult_r producePlots(fitresult, args, None, fitresult_ref=fitresult_ref) exit() - pois = combinetf_input.get_poi_names(fitresult, poi_type=None) + if args.poi: + pois = [args.poi] + else: + pois = combinetf_input.get_poi_names(fitresult, poi_type=args.poiType) + for poi in pois: + logger.debug(f"Now at {poi}") if args.mode in ["both", "ungrouped"]: + logger.debug(f"Make impact per nuisance") producePlots(fitresult, args, poi, fitresult_ref=fitresult_ref) if args.mode in ["both", "group"]: - producePlots(fitresult, args, poi, group=True, fitresult_ref=fitresult_ref) + logger.debug(f"Make impact my group") + producePlots(fitresult, args, poi, group=True, fitresult_ref=fitresult_ref, grouping=grouping) diff --git a/scripts/combine/setupCombine.py b/scripts/combine/setupCombine.py index a62af881c..c9c5fee49 100644 --- a/scripts/combine/setupCombine.py +++ b/scripts/combine/setupCombine.py @@ -25,7 +25,7 @@ def make_subparsers(parser): parser.add_argument("--poiAsNoi", action='store_true', help="Make histogram to do the POIs as NOIs trick (some postprocessing will happen later in CardTool.py)") parser.add_argument("--forceRecoChargeAsGen", action="store_true", help="Force gen charge to match reco charge in CardTool, this only works when the reco charge is used to define the channel") - parser.add_argument("--genAxes", type=str, default=None, nargs="+", help="Specify which gen axes should be used in unfolding/theory agnostic, if 'None', use all (inferred from metadata).") + parser.add_argument("--genAxes", type=str, default=[], nargs="+", help="Specify which gen axes should be used in unfolding/theory agnostic, if 'None', use all (inferred from metadata).") parser.add_argument("--priorNormXsec", type=float, default=1, help="Prior for shape uncertainties on cross sections for theory agnostic or unfolding analysis with POIs as NOIs (1 means 100\%). If negative, it will use shapeNoConstraint in the fit") parser.add_argument("--scaleNormXsecHistYields", type=float, default=None, help="Scale yields of histogram with cross sections variations for theory agnostic analysis with POIs as NOIs. Can be used together with --priorNormXsec") @@ -35,6 +35,9 @@ def make_subparsers(parser): elif subparserName == "theoryAgnosticPolVar": parser.add_argument("--noPolVarOnFake", action="store_true", help="Do not propagate POI variations to fakes") parser.add_argument("--symmetrizePolVar", action='store_true', help="Symmetrize up/Down variations in CardTool (using average)") + elif "unfolding" in subparserName: + parser = common.set_parser_default(parser, "massVariation", 10) + parser = common.set_parser_default(parser, "hdf5", True) return parser @@ -82,8 +85,12 @@ def make_parser(parser=None): parser.add_argument("--forceGlobalScaleFakes", default=None, type=float, help="Scale the fakes by this factor (overriding any custom one implemented in datagroups.py in the fakeSelector).") parser.add_argument("--smoothingOrderFakerate", type=int, default=2, help="Order of the polynomial for the smoothing of the fake rate ") parser.add_argument("--simultaneousABCD", action="store_true", help="Produce datacard for simultaneous fit of ABCD regions") + parser.add_argument("--skipSumGroups", action="store_true", help="Don't add sum groups to the output to save time e.g. when computing impacts") + parser.add_argument("--allowNegativeExpectation", action="store_true", help="Allow processes to have negative contributions") # settings on the nuisances itself parser.add_argument("--doStatOnly", action="store_true", default=False, help="Set up fit to get stat-only uncertainty (currently combinetf with -S 0 doesn't work)") + parser.add_argument("--noTheoryUnc", action="store_true", default=False, help="Set up fit without theory uncertainties") + parser.add_argument("--explicitSignalMCstat", action='store_true', help="Use explicit parameters for signal MC stat uncertainty. Introduces one nuisance parameter per reco bin. When unfolding, correlate bin by bin statistical uncertainty in reco bins with masked channel.") parser.add_argument("--minnloScaleUnc", choices=["byHelicityPt", "byHelicityPtCharge", "byHelicityCharge", "byPtCharge", "byPt", "byCharge", "integrated", "none"], default="byHelicityPt", help="Decorrelation for QCDscale") parser.add_argument("--resumUnc", default="tnp", type=str, choices=["scale", "tnp", "tnp_minnlo", "minnlo", "none"], help="Include SCETlib uncertainties") @@ -135,7 +142,7 @@ def make_parser(parser=None): return parser -def setup(args, inputFile, inputBaseName, inputLumiScale, fitvar, xnorm=False): +def setup(args, inputFile, inputBaseName, inputLumiScale, fitvar, genvar=None, xnorm=False): isUnfolding = args.analysisMode == "unfolding" isTheoryAgnostic = args.analysisMode in ["theoryAgnosticNormVar", "theoryAgnosticPolVar"] @@ -228,7 +235,7 @@ def setup(args, inputFile, inputBaseName, inputLumiScale, fitvar, xnorm=False): if isPoiAsNoi: constrainMass = False if isTheoryAgnostic else True - poi_axes = datagroups.gen_axes_names if args.genAxes is None else args.genAxes + poi_axes = datagroups.gen_axes_names if genvar is None else genvar # remove specified gen axes from set of gen axes in datagroups so that those are integrated over datagroups.setGenAxes(sum_gen_axes=[a for a in datagroups.gen_axes_names if a not in poi_axes]) @@ -254,8 +261,8 @@ def setup(args, inputFile, inputBaseName, inputLumiScale, fitvar, xnorm=False): datagroups.deleteGroup(f"{base_group}OOA") # remove out of acceptance signal elif isUnfolding or isTheoryAgnostic: constrainMass = False if isTheoryAgnostic else True - datagroups.setGenAxes(args.genAxes) - logger.info(f"GEN axes are {args.genAxes}") + datagroups.setGenAxes(genvar) + logger.info(f"GEN axes are {genvar}") if wmass and "qGen" in datagroups.gen_axes_names: # gen level bins, split by charge if "minus" in args.recoCharge: @@ -321,7 +328,7 @@ def setup(args, inputFile, inputBaseName, inputLumiScale, fitvar, xnorm=False): cardTool.setNominalName(inputBaseName) # define sumGroups for integrated cross section - if isFloatingPOIs: + if isFloatingPOIs and not args.skipSumGroups: # TODO: make this less hardcoded to filter the charge (if the charge is not present this will duplicate things) if isTheoryAgnostic and wmass and "qGen" in datagroups.gen_axes: if "plus" in args.recoCharge: @@ -352,11 +359,11 @@ def setup(args, inputFile, inputBaseName, inputLumiScale, fitvar, xnorm=False): pseudodataGroups.set_rebin_action(fitvar, args.axlim, args.rebin, args.absval, rename=False) if wmass and not xnorm: - pseudodataGroups.fakerate_axes=args.fakerateAxes - pseudodataGroups.set_histselectors(pseudodataGroups.getNames(), inputBaseName, mode=args.fakeEstimation, - smoothen=not args.binnedFakeEstimation, smoothingOrderFakerate=args.smoothingOrderFakerate, - integrate_x="mt" not in fitvar, - simultaneousABCD=simultaneousABCD, forceGlobalScaleFakes=args.forceGlobalScaleFakes) + pseudodataGroups.fakerate_axes=args.fakerateAxes + pseudodataGroups.set_histselectors(pseudodataGroups.getNames(), inputBaseName, mode=args.fakeEstimation, + smoothen=not args.binnedFakeEstimation, smoothingOrderFakerate=args.smoothingOrderFakerate, + integrate_x="mt" not in fitvar, + simultaneousABCD=simultaneousABCD, forceGlobalScaleFakes=args.forceGlobalScaleFakes) cardTool.setPseudodataDatagroups(pseudodataGroups) if args.pseudoDataFakes: @@ -516,79 +523,7 @@ def assertSample(name, startsWith=["W", "Z"], excludeMatch=[]): systAxes=["massShift"], passToFakes=passSystToFakes, ) - if args.fitMassDiff == "charge": - cardTool.addSystematic(**mass_diff_args, - # # on gen level based on the sample, only possible for mW - # preOpMap={m.name: (lambda h, swap=swap_bins: swap(h, "massShift", f"massShift{label}50MeVUp", f"massShift{label}50MeVDown")) - # for g in cardTool.procGroups[signal_samples_forMass[0]] for m in cardTool.datagroups.groups[g].members if "minus" in m.name}, - # on reco level based on reco charge - preOpMap={m.name: (lambda h: - hh.swap_histogram_bins(h, "massShift", f"massShift{label}50MeVUp", f"massShift{label}50MeVDown", "charge", 0) - ) for g in cardTool.procGroups[signal_samples_forMass[0]] for m in cardTool.datagroups.groups[g].members}, - ) - elif args.fitMassDiff == "cosThetaStarll": - cardTool.addSystematic(**mass_diff_args, - preOpMap={m.name: (lambda h: - hh.swap_histogram_bins(h, "massShift", f"massShift{label}50MeVUp", f"massShift{label}50MeVDown", "cosThetaStarll", hist.tag.Slicer()[0:complex(0,0):]) - ) for g in cardTool.procGroups[signal_samples_forMass[0]] for m in cardTool.datagroups.groups[g].members}, - ) - elif args.fitMassDiff == "eta-sign": - cardTool.addSystematic(**mass_diff_args, - preOpMap={m.name: (lambda h: - hh.swap_histogram_bins(h, "massShift", f"massShift{label}50MeVUp", f"massShift{label}50MeVDown", "eta", hist.tag.Slicer()[0:complex(0,0):]) - ) for g in cardTool.procGroups[signal_samples_forMass[0]] for m in cardTool.datagroups.groups[g].members}, - ) - elif args.fitMassDiff == "eta-range": - cardTool.addSystematic(**mass_diff_args, - preOpMap={m.name: (lambda h: - hh.swap_histogram_bins(h, "massShift", f"massShift{label}50MeVUp", f"massShift{label}50MeVDown", "eta", hist.tag.Slicer()[complex(0,-0.9):complex(0,0.9):]) - ) for g in cardTool.procGroups[signal_samples_forMass[0]] for m in cardTool.datagroups.groups[g].members}, - ) - elif args.fitMassDiff.startswith("etaRegion"): - # 3 bins, use 3 unconstrained parameters: mass; mass0 - mass2; mass0 + mass2 - mass1 - mass_diff_args["rename"] = f"massDiff1{suffix}{label}" - mass_diff_args["systNameReplace"] = [("Shift",f"Diff1{suffix}")] - cardTool.addSystematic(**mass_diff_args, - preOpMap={m.name: (lambda h: hh.swap_histogram_bins( - hh.swap_histogram_bins(h, "massShift", f"massShift{label}50MeVUp", f"massShift{label}50MeVDown", args.fitMassDiff, 2), # invert for mass2 - "massShift", f"massShift{label}50MeVUp", f"massShift{label}50MeVDown", args.fitMassDiff, 1, axis1_replace=f"massShift{label}0MeV") # set mass1 to nominal - ) for g in cardTool.procGroups[signal_samples_forMass[0]] for m in cardTool.datagroups.groups[g].members}, - ) - mass_diff_args["rename"] = f"massDiff2{suffix}{label}" - mass_diff_args["systNameReplace"] = [("Shift",f"Diff2{suffix}")] - cardTool.addSystematic(**mass_diff_args, - preOpMap={m.name: (lambda h: - hh.swap_histogram_bins(h, "massShift", f"massShift{label}50MeVUp", f"massShift{label}50MeVDown", args.fitMassDiff, 1) - ) for g in cardTool.procGroups[signal_samples_forMass[0]] for m in cardTool.datagroups.groups[g].members}, - ) - - if cardTool.getFakeName() != "QCD" and cardTool.getFakeName() in datagroups.groups.keys() and not xnorm and (not args.binnedFakeEstimation or (args.fakeEstimation in ["extrapolate"] and "mt" in fitvar)): - syst_axes = ["eta", "charge"] if (not args.binnedFakeEstimation or args.fakeEstimation not in ["extrapolate"]) else ["eta", "pt", "charge"] - info=dict( - name=inputBaseName, - group="Fake", - processes=cardTool.getFakeName(), - noConstraint=False, - mirror=False, - scale=1, - applySelection=False, # don't apply selection, all regions will be needed for the action - action=cardTool.datagroups.groups[cardTool.getFakeName()].histselector.get_hist, - systAxes=[f"_{x}" for x in syst_axes if x in args.fakerateAxes]+["_param", "downUpVar"]) - subgroup = f"{cardTool.getFakeName()}Rate" - cardTool.addSystematic(**info, - rename=subgroup, - splitGroup = {subgroup: f".*", "experiment": ".*"}, - systNamePrepend=subgroup, - actionArgs=dict(variations_frf=True), - ) - if args.fakeEstimation in ["extended2D",]: - subgroup = f"{cardTool.getFakeName()}Shape" - cardTool.addSystematic(**info, - rename=subgroup, - splitGroup = {subgroup: f".*", "experiment": ".*"}, - systNamePrepend=subgroup, - actionArgs=dict(variations_scf=True), - ) + combine_helpers.add_mass_diff_variations(cardTool, mass_diff_args) # this appears within doStatOnly because technically these nuisances should be part of it if isPoiAsNoi: @@ -655,91 +590,108 @@ def assertSample(name, startsWith=["W", "Z"], excludeMatch=[]): ) muRmuFPolVar_helper.add_theoryAgnostic_uncertainty() + if args.explicitSignalMCstat: + if not xnorm: + recovar = fitvar + else: + # need to find the reco variables that correspond to the masked channel + idx = args.inputFile.index(inputFile) + recovar = args.fitvar[idx].split("-") + + if xnorm and not args.fitresult: + # use variations from reco histogram and apply them to xnorm + source = ("nominal", "yieldsUnfolding") + else: + None + + combine_helpers.add_explicit_MCstat(cardTool, recovar, samples="signal_samples", wmass=wmass, source=source) + if args.doStatOnly: # print a card with only mass weights logger.info("Using option --doStatOnly: the card was created with only mass nuisance parameter") return cardTool - if wmass and not xnorm: - cardTool.addSystematic(f"massWeightZ", - processes=['single_v_nonsig_samples'], - group="ZmassAndWidth", + if not args.noTheoryUnc: + if wmass and not xnorm: + cardTool.addSystematic(f"massWeightZ", + processes=['single_v_nonsig_samples'], + group="ZmassAndWidth", + splitGroup = {"theory": ".*"}, + skipEntries=massWeightNames(proc="Z", exclude=2.1), + mirror=False, + noConstraint=False, + systAxes=["massShift"], + passToFakes=passSystToFakes, + ) + + # Experimental range + #widthVars = (42, ['widthW2p043GeV', 'widthW2p127GeV']) if wmass else (2.3, ['widthZ2p4929GeV', 'widthZ2p4975GeV']) + # Variation from EW fit (mostly driven by alphas unc.) + cardTool.addSystematic("widthWeightZ", + rename="WidthZ0p8MeV", + processes= ['single_v_nonsig_samples'] if wmass else signal_samples_forMass, + action=lambda h: h[{"width" : ['widthZ2p49333GeV', 'widthZ2p49493GeV']}], + group="ZmassAndWidth" if wmass else "widthZ", splitGroup = {"theory": ".*"}, - skipEntries=massWeightNames(proc="Z", exclude=2.1), mirror=False, - noConstraint=False, - systAxes=["massShift"], + noi=args.fitWidth if not wmass else False, + noConstraint=args.fitWidth if not wmass else False, + systAxes=["width"], + outNames=["widthZDown", "widthZUp"], passToFakes=passSystToFakes, ) + if wmass: + cardTool.addSystematic("widthWeightW", + rename="WidthW0p6MeV", + processes=signal_samples_forMass, + action=lambda h: h[{"width" : ['widthW2p09053GeV', 'widthW2p09173GeV']}], + group="widthW", + splitGroup = {"theory": ".*"}, + mirror=False, + noi=args.fitWidth, + noConstraint=args.fitWidth, + systAxes=["width"], + outNames=["widthWDown", "widthWUp"], + passToFakes=passSystToFakes, + ) - # Experimental range - #widthVars = (42, ['widthW2p043GeV', 'widthW2p127GeV']) if wmass else (2.3, ['widthZ2p4929GeV', 'widthZ2p4975GeV']) - # Variation from EW fit (mostly driven by alphas unc.) - cardTool.addSystematic("widthWeightZ", - rename="WidthZ0p8MeV", - processes= ['single_v_nonsig_samples'] if wmass else signal_samples_forMass, - action=lambda h: h[{"width" : ['widthZ2p49333GeV', 'widthZ2p49493GeV']}], - group="ZmassAndWidth" if wmass else "widthZ", - splitGroup = {"theory": ".*"}, - mirror=False, - noi=args.fitWidth if not wmass else False, - noConstraint=args.fitWidth if not wmass else False, - systAxes=["width"], - outNames=["widthZDown", "widthZUp"], - passToFakes=passSystToFakes, - ) - if wmass: - cardTool.addSystematic("widthWeightW", - rename="WidthW0p6MeV", - processes=signal_samples_forMass, - action=lambda h: h[{"width" : ['widthW2p09053GeV', 'widthW2p09173GeV']}], - group="widthW", - splitGroup = {"theory": ".*"}, + cardTool.addSystematic(f"sin2thetaWeightZ", + rename=f"Sin2thetaZ0p00003", + processes= ['z_samples'], + action=lambda h: h[{"sin2theta" : ['sin2thetaZ0p23151', 'sin2thetaZ0p23157']}], + group=f"sin2thetaZ", mirror=False, - noi=args.fitWidth, - noConstraint=args.fitWidth, - systAxes=["width"], - outNames=["widthWDown", "widthWUp"], + systAxes=["sin2theta"], + outNames=[f"sin2thetaZDown", f"sin2thetaZUp"], passToFakes=passSystToFakes, ) - cardTool.addSystematic(f"sin2thetaWeightZ", - rename=f"Sin2thetaZ0p00003", - processes= ['z_samples'], - action=lambda h: h[{"sin2theta" : ['sin2thetaZ0p23151', 'sin2thetaZ0p23157']}], - group=f"sin2thetaZ", - mirror=False, - systAxes=["sin2theta"], - outNames=[f"sin2thetaZDown", f"sin2thetaZUp"], - passToFakes=passSystToFakes, - ) - - combine_helpers.add_electroweak_uncertainty(cardTool, [*args.ewUnc, *args.fsrUnc, *args.isrUnc], - samples="single_v_samples", flavor=datagroups.flavor, passSystToFakes=passSystToFakes) - - to_fakes = passSystToFakes and not args.noQCDscaleFakes and not xnorm - - theory_helper = combine_theory_helper.TheoryHelper(cardTool, args, hasNonsigSamples=(wmass and not xnorm)) - theory_helper.configure(resumUnc=args.resumUnc, - transitionUnc = not args.noTransitionUnc, - propagate_to_fakes=to_fakes, - np_model=args.npUnc, - tnp_scale = args.scaleTNP, - mirror_tnp=False, - pdf_from_corr=args.pdfUncFromCorr, - scale_pdf_unc=args.scalePdf, - minnlo_unc=args.minnloScaleUnc, - ) + combine_helpers.add_electroweak_uncertainty(cardTool, [*args.ewUnc, *args.fsrUnc, *args.isrUnc], + samples="single_v_samples", flavor=datagroups.flavor, passSystToFakes=passSystToFakes) + + to_fakes = passSystToFakes and not args.noQCDscaleFakes and not xnorm + + theory_helper = combine_theory_helper.TheoryHelper(cardTool, args, hasNonsigSamples=(wmass and not xnorm)) + theory_helper.configure(resumUnc=args.resumUnc, + transitionUnc = not args.noTransitionUnc, + propagate_to_fakes=to_fakes, + np_model=args.npUnc, + tnp_scale = args.scaleTNP, + mirror_tnp=False, + pdf_from_corr=args.pdfUncFromCorr, + scale_pdf_unc=args.scalePdf, + minnlo_unc=args.minnloScaleUnc, + ) - theorySystSamples = ["signal_samples_inctau"] - if wmass: - if args.noPDFandQCDtheorySystOnSignal: - theorySystSamples = ["wtau_samples"] - theorySystSamples.append("single_v_nonsig_samples") - if xnorm: - theorySystSamples = ["signal_samples"] + theorySystSamples = ["signal_samples_inctau"] + if wmass: + if args.noPDFandQCDtheorySystOnSignal: + theorySystSamples = ["wtau_samples"] + theorySystSamples.append("single_v_nonsig_samples") + if xnorm: + theorySystSamples = ["signal_samples"] - theory_helper.add_all_theory_unc(theorySystSamples, skipFromSignal=args.noPDFandQCDtheorySystOnSignal) + theory_helper.add_all_theory_unc(theorySystSamples, skipFromSignal=args.noPDFandQCDtheorySystOnSignal) if xnorm or genfit: return cardTool @@ -766,6 +718,34 @@ def assertSample(name, startsWith=["W", "Z"], excludeMatch=[]): cardTool.addLnNSystematic("CMS_background", processes=["Other"], size=1.15, group="CMS_background", splitGroup = {"experiment": ".*"},) cardTool.addLnNSystematic("lumi", processes=['MCnoQCD'], size=1.017 if lowPU else 1.012, group="luminosity", splitGroup = {"experiment": ".*"},) + if cardTool.getFakeName() != "QCD" and cardTool.getFakeName() in datagroups.groups.keys() and not xnorm and (not args.binnedFakeEstimation or (args.fakeEstimation in ["extrapolate"] and "mt" in fitvar)): + syst_axes = ["eta", "charge"] if (not args.binnedFakeEstimation or args.fakeEstimation not in ["extrapolate"]) else ["eta", "pt", "charge"] + info=dict( + name=inputBaseName, + group="Fake", + processes=cardTool.getFakeName(), + noConstraint=False, + mirror=False, + scale=1, + applySelection=False, # don't apply selection, all regions will be needed for the action + action=cardTool.datagroups.groups[cardTool.getFakeName()].histselector.get_hist, + systAxes=[f"_{x}" for x in syst_axes if x in args.fakerateAxes]+["_param", "downUpVar"]) + subgroup = f"{cardTool.getFakeName()}Rate" + cardTool.addSystematic(**info, + rename=subgroup, + splitGroup = {subgroup: f".*", "experiment": ".*"}, + systNamePrepend=subgroup, + actionArgs=dict(variations_frf=True), + ) + if args.fakeEstimation in ["extended2D",]: + subgroup = f"{cardTool.getFakeName()}Shape" + cardTool.addSystematic(**info, + rename=subgroup, + splitGroup = {subgroup: f".*", "experiment": ".*"}, + systNamePrepend=subgroup, + actionArgs=dict(variations_scf=True), + ) + if not args.noEfficiencyUnc: if not lowPU: @@ -1170,9 +1150,10 @@ def main(args, xnorm=False): checkSysts = forceNonzero fitvar = args.fitvar[0].split("-") if not xnorm else ["count"] + genvar = args.genAxes[0].split("-") if hasattr(args,"genAxes") and len(args.genAxes) else None iBaseName = args.baseName[0] iLumiScale = args.lumiScale[0] - cardTool = setup(args, args.inputFile[0], iBaseName, iLumiScale, fitvar, xnorm) + cardTool = setup(args, args.inputFile[0], iBaseName, iLumiScale, fitvar, genvar, xnorm=xnorm) cardTool.setOutput(outputFolderName(args.outfolder, cardTool, args.doStatOnly, args.postfix), analysis_label(cardTool)) cardTool.writeOutput(args=args, forceNonzero=forceNonzero, check_systs=checkSysts) return @@ -1196,8 +1177,8 @@ def main(args, xnorm=False): raise ValueError("Options unfolding and --fitXsec are incompatible. Please choose one or the other") if isTheoryAgnostic: - if args.genAxes is None: - args.genAxes = ["ptVgenSig", "absYVgenSig", "helicitySig"] + if len(args.genAxes) == 0: + args.genAxes = ["ptVgenSig-absYVgenSig-helicitySig"] logger.warning(f"Automatically setting '--genAxes {' '.join(args.genAxes)}' for theory agnostic analysis") if args.poiAsNoi: logger.warning("This is only needed to properly get the systematic axes") @@ -1210,10 +1191,14 @@ def main(args, xnorm=False): if args.hdf5: writer = HDF5Writer.HDF5Writer(sparse=args.sparse) + if args.baseName == "xnorm": + writer.theoryFit = True + # loop over all files outnames = [] for i, ifile in enumerate(args.inputFile): fitvar = args.fitvar[i].split("-") + genvar = args.genAxes[i].split("-") if hasattr(args,"genAxes") and len(args.genAxes) else None iBaseName = args.baseName[0] if len(args.baseName)==1 else args.baseName[i] iLumiScale = args.lumiScale[0] if len(args.lumiScale)==1 else args.lumiScale[i] @@ -1221,11 +1206,12 @@ def main(args, xnorm=False): outnames.append( (outputFolderName(args.outfolder, cardTool, args.doStatOnly, args.postfix), analysis_label(cardTool)) ) writer.add_channel(cardTool) - if isFloatingPOIs: - cardTool = setup(args, ifile, iBaseName, iLumiScale, ["count"], xnorm=True) + if isFloatingPOIs or isUnfolding: + fitvar = genvar if isPoiAsNoi else ["count"] + cardTool = setup(args, ifile, iBaseName, iLumiScale, fitvar, xnorm=True) writer.add_channel(cardTool) if args.fitresult: - writer.set_fitresult(args.fitresult, mc_stat=not args.noMCStat) + writer.set_fitresult(args.fitresult, mc_stat=not (args.noMCStat or args.explicitSignalMCstat)) if len(outnames) == 1: outfolder, outfile = outnames[0] @@ -1235,7 +1221,7 @@ def main(args, xnorm=False): outfolder = f"{args.outfolder}/Combination_{''.join(unique_names)}{dir_append}/" outfile = "Combination" logger.info(f"Writing HDF5 output to {outfile}") - writer.write(args, outfolder, outfile) + writer.write(args, outfolder, outfile, allowNegativeExpectation=args.allowNegativeExpectation) else: if len(args.inputFile) > 1: raise IOError(f"Multiple input files only supported within --hdf5 mode") diff --git a/scripts/histmakers/mw_with_mu_eta_pt.py b/scripts/histmakers/mw_with_mu_eta_pt.py index 5bca5628c..afeab596f 100644 --- a/scripts/histmakers/mw_with_mu_eta_pt.py +++ b/scripts/histmakers/mw_with_mu_eta_pt.py @@ -467,7 +467,7 @@ def build_graph(df, dataset): df = df.Define("goodMuons_pt", "Muon_correctedPt[goodMuons]") df = df.Define("goodMuons_charge", "Muon_correctedCharge[goodMuons]") df = df.Define(f"goodMuons_{cvhName}NValidPixelHits", f"Muon_{cvhName}NValidPixelHits[goodMuons]") - df = df.Define("goodMuons_triggerCat", "ROOT::VecOps::RVec(goodMuons_eta.size(), wrem::TriggerCat::triggering)"); + df = df.Define("goodMuons_triggerCat", "ROOT::VecOps::RVec(goodMuons_eta.size(), wrem::TriggerCat::triggering)") pixel_multiplicity_cols = ["goodMuons_triggerCat", "goodMuons_eta", "goodMuons_pt", "goodMuons_charge", f"goodMuons_{cvhName}NValidPixelHits"] diff --git a/scripts/histmakers/mz_wlike_with_mu_eta_pt.py b/scripts/histmakers/mz_wlike_with_mu_eta_pt.py index f04342817..83b1561ba 100644 --- a/scripts/histmakers/mz_wlike_with_mu_eta_pt.py +++ b/scripts/histmakers/mz_wlike_with_mu_eta_pt.py @@ -26,13 +26,14 @@ initargs,_ = parser.parse_known_args() logger = logging.setup_logger(__file__, initargs.verbose, initargs.noColorLogger) -isUnfolding = initargs.analysisMode == "unfolding" - parser = common.set_parser_default(parser, "aggregateGroups", ["Diboson", "Top", "Wtaunu", "Wmunu"]) parser = common.set_parser_default(parser, "excludeProcs", ["QCD"]) args = parser.parse_args() +isUnfolding = args.analysisMode == "unfolding" +isPoiAsNoi = isUnfolding and args.poiAsNoi + thisAnalysis = ROOT.wrem.AnalysisType.Wlike isoBranch = muon_selections.getIsoBranch(args.isolationDefinition) era = args.era diff --git a/scripts/plotting/controlPlotsHDF5.py b/scripts/plotting/controlPlotsHDF5.py index 44910bfde..d504cc919 100644 --- a/scripts/plotting/controlPlotsHDF5.py +++ b/scripts/plotting/controlPlotsHDF5.py @@ -1,4 +1,5 @@ import mplhep as hep +import matplotlib.pyplot as plt import numpy as np import hist import itertools @@ -28,10 +29,15 @@ parser.add_argument("--noStack", action='store_true', help="Don't plot the individual processes") parser.add_argument("--processes", type=str, nargs='*', default=[], help="Select processes") parser.add_argument("--splitByProcess", action='store_true', help="Make a separate plot for each of the selected processes") -parser.add_argument("--selectionAxes", type=str, default=["charge", "passIso", "passMT"], +parser.add_argument("--selectionAxes", type=str, nargs="*", default=["charge", "passIso", "passMT"], help="List of axes where for each bin a seperate plot is created") +parser.add_argument("--select", type=int, nargs="*", default=[], + help="Select specific bins of the selectionAxis e.g. '0 1' to select the first bin of the first axis and second bin of the second axis") +parser.add_argument("--hists", type=str, nargs='*', default=None, + help="List of hists to plot; dash separated for unrolled hists") + # variations -parser.add_argument("--varName", type=str, nargs='+', required=True, help="Name of variation hist") +parser.add_argument("--varName", type=str, nargs='*', default=[], help="Name of variation hist") parser.add_argument("--varLabel", type=str, nargs='*', default=[], help="Label(s) of variation hist for plotting") parser.add_argument("--varColor", type=str, nargs='*', default=[], help="Variation colors") @@ -75,12 +81,19 @@ def make_plots(hists_proc, hist_data, *opts, **info): for axes_names in axes_combinations: if isinstance(axes_names, str): axes_names = [axes_names] + + if args.hists: + if not any(set(axes_names) == set(h.split("-")) for h in args.hists): + continue + logger.info(f"Make plot(s) with axes {axes_names}") make_plot(hists_proc, hist_data, axes_names=axes_names, *opts, **info) -def make_plot(hists_proc, hist_data, hists_syst_up, hists_syst_dn, axes_names, suffix="", channel="", colors=[], labels=[], procs=[], rlabel="1/Pred.", density=False, legtext_size=20): +def make_plot(hists_proc, hist_data, hists_syst_up, hists_syst_dn, axes_names, + selections=None, selection_edges=None, channel="", colors=[], labels=[], procs=[], rlabel="1/Pred.", density=False, legtext_size=20 +): if any(x in axes_names for x in ["ptll", "mll", "ptVgen", "ptVGen"]): # in case of variable bin width normalize to unit binwnorm = 1.0 @@ -234,6 +247,20 @@ def make_plot(hists_proc, hist_data, hists_syst_up, hists_syst_dn, axes_names, s ax=ax2 ) + scale = max(1, np.divide(*ax1.get_figure().get_size_inches())*0.3) + + if selections is not None: + for i, (key, idx) in enumerate(selections.items()): + lo, hi = selection_edges[i] + label = styles.xlabels[key].replace(" (GeV)","") + if lo != None: + label = f"{lo} < {label}" + if hi != None: + label = f"{label} < {hi}" + + ax1.text(0.05, 0.96-i*0.08, label, horizontalalignment='left', verticalalignment='top', transform=ax1.transAxes, + fontsize=20*args.scaleleg*scale) + plot_tools.addLegend(ax1, 2, text_size=legtext_size) if add_ratio: plot_tools.fix_axes(ax1, ax2, yscale=args.yscale, logy=args.logy) @@ -248,7 +275,7 @@ def make_plot(hists_proc, hist_data, hists_syst_up, hists_syst_dn, axes_names, s if args.cmsDecor: lumi = float(f"{channel_info['lumi']:.3g}") if not density else None - scale = max(1, np.divide(*ax1.get_figure().get_size_inches())*0.3) + hep.cms.label(ax=ax1, lumi=lumi, fontsize=legtext_size*scale, label= args.cmsDecor, data=hist_data is not None) @@ -259,8 +286,8 @@ def make_plot(hists_proc, hist_data, hists_syst_up, hists_syst_dn, axes_names, s outfile += f"{procs[i]}_" outfile += "_".join(axes_names) outfile += f"_{channel}" - if suffix: - outfile += f"_{suffix}" + if selections is not None: + outfile += "_" + "_".join([f"{a}{i}" for a, i in selections.items()]) if args.postfix: outfile += f"_{args.postfix}" plot_tools.save_pdf_and_png(outdir, outfile) @@ -315,13 +342,18 @@ def make_plot(hists_proc, hist_data, hists_syst_up, hists_syst_dn, axes_names, s info = dict(channel=channel, labels=labels,colors=colors, procs=procs) # make plots in slices (e.g. for charge plus an minus separately) - selection_axes = [a for a in hists_proc[0].axes if a.name in args.selectionAxes] + selection_axes = [hists_proc[0].axes[n] for n in args.selectionAxes if n in hists_proc[0].axes.name] if len(selection_axes) > 0: - selection_bins = [np.arange(a.size) for a in hists_proc[0].axes if a.name in args.selectionAxes] other_axes = [a for a in hists_proc[0].axes if a not in selection_axes] + if len(args.select): + bin_combinations = [args.select] + else: + selection_bins = [np.arange(a.size) for a in hists_proc[0].axes if a.name in args.selectionAxes] + bin_combinations = itertools.product(*selection_bins) - for bins in itertools.product(*selection_bins): + for bins in bin_combinations: idxs = {a.name: i for a, i in zip(selection_axes, bins) } + selection_edges = [(a.edges[i],a.edges[i+1] if len(a.edges-1)>i else None) for a, i in zip(selection_axes, bins)] hs_proc = [h[idxs] for h in hists_proc] @@ -334,8 +366,7 @@ def make_plot(hists_proc, hist_data, hists_syst_up, hists_syst_dn, axes_names, s hs_syst_dn = [h[idxs] for h in hists_syst_dn] hs_syst_up = [h[idxs] for h in hists_syst_up] - suffix = "_".join([f"{a}{i}" for a, i in idxs.items()]) - make_plots(hs_proc, h_data, hs_syst_dn, hs_syst_up, suffix=suffix, **info) + make_plots(hs_proc, h_data, hs_syst_dn, hs_syst_up, selections=idxs, selection_edges=selection_edges, **info) else: make_plots(hists_proc, hist_data, hists_syst_dn, hists_syst_up, **info) diff --git a/scripts/plotting/inclusive_xsec_summary.py b/scripts/plotting/inclusive_xsec_summary.py new file mode 100644 index 000000000..08d76c2c3 --- /dev/null +++ b/scripts/plotting/inclusive_xsec_summary.py @@ -0,0 +1,227 @@ +# On results of fiducial inclusive cross sections and their ratios +# make a summary plot with different theory predictions +# make a latex summary table with the breakdown of uncertainties + +from wremnants import plot_tools + +from utilities import common, logging +from utilities.io_tools import output_tools, combinetf_input, conversion_tools, tex_tools + +from utilities.styles.styles import nuisance_groupings + +from narf import ioutils + +import math + +import numpy as np +import pandas as pd +import json + +import matplotlib.pyplot as plt +import matplotlib.patches as mpatches +import mplhep as hep + +import pdb + +parser = common.plot_parser() +parser.add_argument("infile", type=str, help="Combine fitresult file") +parser.add_argument("-t","--translate", type=str, default=None, help="Specify .json file to translate labels") + +args = parser.parse_args() + +logger = logging.setup_logger(__file__, args.verbose, args.noColorLogger) + +if args.infile.endswith(".root"): + args.infile = args.infile.replace(".root", ".hdf5") + +grouping = nuisance_groupings["max"] + +fitresult = combinetf_input.get_fitresult(args.infile) +meta = ioutils.pickle_load_h5py(fitresult["meta"]) + +translate_label = {} +if args.translate: + with open(args.translate) as f: + translate_label = json.load(f) + +pois = ["W_qGen0_sumxsec", "W_qGen1_sumxsec", "W_sumxsec", "Z_sumxsec", "r_qGen_W__ratiometaratio", "r_WZ_ratiometaratio"] +poi_names = ["$\mathrm{W}^{-}$", "$\mathrm{W}^{+}$", "$\mathrm{W}$", "$\mathrm{Z}$", "$\mathrm{W}^{+}/\mathrm{W}^{-}$", "$\mathrm{W/Z}$"] + +pois = ["W_qGen0_sumxsec", "W_qGen1_sumxsec", "W_sumxsec", "r_qGen_W_ratiometaratio",] +poi_names = ["$\mathrm{W}^{-}$", "$\mathrm{W}^{+}$", "$\mathrm{W}$", "$\mathrm{W}^{+}/\mathrm{W}^{-}$",] + + +combine = { + "binByBinStat": "binByBinStat", + "statMC": "binByBinStat", + "ZmassAndWidth": "massAndWidth", + "massShift": "massAndWidth", + "QCDscale": "QCDscale", + "bcQuarkMass": "QCDscale", +} +combine = {} + +dfs = [] +for poi, poi_name in zip(pois, poi_names): + + if poi.endswith("sumxsec"): + channel_info = conversion_tools.combine_channels(meta, True) + if len(channel_info.keys()) == 1: + lumi = channel_info["chan_13TeV"]["lumi"] + else: + raise NotImplementedError(f"Found channels {[k for k in channel_info.keys()]} but only one channel is supported.") + scale = 1./(lumi*1000) + elif poi.endswith("ratio"): + scale=1.0 + + impacts, labels, norm = combinetf_input.read_impacts_poi(fitresult, True, add_total=True, stat=0.0, poi=poi, normalize=False) + + filtimpacts = [] + filtlabels = [] + for impact, label in zip(impacts,labels): + if label not in grouping: + continue + if label in combine: + label = combine[label] + if label in filtlabels: + idx = filtlabels.index(label) + filtimpacts[idx] = (filtimpacts[idx]**2 + impact**2)**0.5 + continue + + filtimpacts.append(impact) + filtlabels.append(label) + + impacts = filtimpacts + labels = filtlabels + + df = pd.DataFrame(np.array(impacts, dtype=np.float64).T*scale, columns=["impact"]) + + df["label"] = labels + df["systematic"] = df["label"].apply(lambda l: translate_label.get(l, l)) + df["poi_name"] = poi_name + df["norm"] = norm*scale + + dfs.append(df) + +df = pd.concat(dfs) + +outdir = output_tools.make_plot_dir(args.outpath, args.outfolder, eoscp=args.eoscp) + +# make latex table +relative = True # compute relative uncertainty +percentage = True # numbers in percentage + +df_t = df.copy() +if relative: + df_t["impact"] /= df_t["norm"] +if percentage: + df_t["impact"] *= 100 + +# sorting +cat_dtype = pd.CategoricalDtype(categories=poi_names, ordered=True) +df_t['poi_name'] = df_t['poi_name'].astype(cat_dtype) + + +outname = "summary_table" +if args.postfix: + outname += f"_{args.postfix}" +tex_tools.make_latex_table(df_t, output_dir=outdir, output_name=outname, + column_title=None, + caption="Uncertainties in percentage.", + label="", sublabel="", + column_name="poi_name", row_name="systematic", + cell_columns=["impact"], cell_format=lambda x: f"${round(x,2)}$", sort="impact") + +# make plot +hep.style.use(hep.style.ROOT) + + +plt.clf() +fig = plt.figure() +fig.subplots_adjust(left=0.15, right=0.99, top=0.99, bottom=0.125) +ax = fig.add_subplot(111) + +# x axis range +lo, hi = 0.94, 1.09 + +# totals = [] +# stats = [] +norms = [] +for i, poi_name in enumerate(poi_names[::-1]): + df_g = df.loc[df["poi_name"] == poi_name] + + norm = df_g["norm"].values[0] + total = df_g.loc[df_g["label"] == "Total"]["impact"].values[0] + stat = df_g.loc[df_g["label"] == "stat"]["impact"].values[0] + total_rel = total / norm + stat_rel = stat / norm + + norms.append(norm) + # totals.append(total) + # stats.append(stat) + + x1 = ax.bar(1.0, height=1, bottom=i, width=2*total_rel, color="silver", label="Total") + x2 = ax.bar(1.0, height=1, bottom=i, width=2*stat_rel, color="gold", label="Stat") + + # round to two significant digits in total uncertainty + sig_digi = 2 - int(math.floor(math.log10(abs(total)))) - 1 + + if sig_digi <= 0: + norm = int(norm) + total = int(total) + else: + norm = round(norm, sig_digi) + total = round(total, sig_digi) + + ax.text(lo+0.01, i+0.5, poi_name, fontsize=20, verticalalignment="bottom", horizontalalignment="left") + title = f"${norm} \pm {total}" + if "/" in poi_name: + title += "$" + else: + title += "\,\mathrm{pb}$" + ax.text(hi-0.05, i+0.5, title, fontsize=20, verticalalignment="bottom", horizontalalignment="left") + +ax.text(hi-0.05, len(poi_names)+0.5, "$\mathrm{Measured} \pm {unc}$", fontsize=20, verticalalignment="bottom", horizontalalignment="left") + +x0 = ax.plot([1.,1.],[0,len(norms)], color="black") +ax.plot([lo, hi], [len(norms),len(norms)], color="black") + + +# make legend + +# Custom legend handles +p2 = mpatches.Rectangle((0, 0), 1, 1, facecolor="gold", edgecolor='gold', linewidth=3), +p1 = mpatches.Rectangle((0, 0), 2, 1, facecolor="silver", edgecolor='silver', linewidth=3), + +leg_styles = [(x2[0], x1[0], x0[0])] +leg_labels = ['Measurement'] +leg = ax.legend(leg_styles, leg_labels, loc="upper left", ncol=len(leg_labels), fontsize=20) + +ax.set_xlim([lo, hi]) +ax.set_ylim([0,len(norms)+1]) + +ax.set_xlabel("1./Measurement", fontsize=20) + +# Disable ticks on the top and right axes +ax.tick_params(top=False) + +# Disable y-axis labels and ticks +plt.gca().set_yticklabels([]) +plt.gca().set_yticks([]) + +scale = max(1, np.divide(*ax.get_figure().get_size_inches())*0.3) +hep.cms.label(ax=ax, lumi=float(f"{args.lumi:.3g}"), fontsize=20*args.scaleleg*scale, + label=args.cmsDecor, data=True) + +outname = "summary" +if args.postfix: + outname += f"_{args.postfix}" +plot_tools.save_pdf_and_png(outdir, outname) + +plot_tools.write_index_and_log(outdir, outname, + analysis_meta_info={"CombinetfOutput" : meta["meta_info"]}, + args=args, +) + +if output_tools.is_eosuser_path(args.outpath) and args.eoscp: + output_tools.copy_to_eos(args.outpath, args.outfolder) diff --git a/scripts/plotting/plot_angular_coefficients.py b/scripts/plotting/plot_angular_coefficients.py index 85cee859d..a73c9b28d 100644 --- a/scripts/plotting/plot_angular_coefficients.py +++ b/scripts/plotting/plot_angular_coefficients.py @@ -1,158 +1,139 @@ -from utilities import logging, common +from utilities import logging, common, boostHistHelpers as hh from utilities.io_tools import input_tools, output_tools from utilities.styles import styles from wremnants import theory_tools, plot_tools -import numpy as np +import h5py import matplotlib as mpl -import hist -import math +import matplotlib.pyplot as plt import mplhep as hep -import h5py - -def a0(theta, phi): - return 1. + np.cos(theta)**2 - -def a1(theta, phi): - return 0.5 * (1. - 3. * np.cos(theta)**2) - -def a2(theta, phi): - return np.sin(2*theta) * np.cos(phi) - -def a3(theta, phi): - return 0.5 * np.sin(theta)**2 * np.cos(2*phi) - -def a4(theta, phi): - return np.sin(theta) * np.cos(phi) - -def a5(theta, phi): - return np.cos(theta) - -def a6(theta, phi): - return np.sin(theta)**2 * np.sin(2*phi) - -def a7(theta, phi): - return np.sin(2*theta) * np.sin(phi) - -def a8(theta, phi): - return np.sin(theta) * np.sin(phi) - -def load_moments_coefficients(filename, process="Z"): - with h5py.File(filename, "r") as ff: - out = input_tools.load_results_h5py(ff) - - moments = out[process] - - corrh = theory_tools.moments_to_angular_coeffs(moments) - - if 'muRfact' in corrh.axes.name: - corrh = corrh[{'muRfact' : 1.j,}] - if 'muFfact' in corrh.axes.name: - corrh = corrh[{'muFfact' : 1.j,}] - - axes_names = ['massVgen','absYVgen','ptVgen','chargeVgen', 'helicity'] - if not list(corrh.axes.name) == axes_names: - raise ValueError (f"Axes [{corrh.axes.name}] are not the ones this functions expects ({axes_names})") - - if np.count_nonzero(corrh[{"helicity" : -1.j}] == 0): - logger.warning("Zeros in sigma UL for the angular coefficients will give undefined behaviour!") - # histogram has to be without errors to load the tensor directly - corrh_noerrs = hist.Hist(*corrh.axes, storage=hist.storage.Double()) - corrh_noerrs.values(flow=True)[...] = corrh.values(flow=True) - - return corrh_noerrs +import numpy as np +from scipy.stats import chi2 if __name__ == '__main__': parser = common.plot_parser() - parser.add_argument("infile", help="Moments file `w_z_moments.hdf` with coefficients produced in w_z_gen_dists.py histmaker") + parser.add_argument("helicities", nargs="+", type=str, help="File `w_z_helicity_xsecs.hdf` with helicity cross sections produced in w_z_gen_dists.py histmaker") + parser.add_argument("--labels", nargs="+", type=str, help="Labels for the different input files") + parser.add_argument("--process", default="Z", choices=["Z", "W"], help="Process to be plotted") + parser.add_argument("--plotXsecs", action="store_true", help="Plot helicity cross sections instead of angular coefficients") args = parser.parse_args() logger = logging.setup_logger(__file__, args.verbose, args.noColorLogger) - if args.infile is not None: - raise NotImplementedError("Using moments file from histmaker output is not yet supported") - # hcoeff = load_moments_coefficients(args.infile) - outdir = output_tools.make_plot_dir(args.outpath, args.outfolder, eoscp=args.eoscp) - colors = mpl.colormaps["tab10"] - linestyles = ["solid","dotted","dashed","dashdot"] + plot_coefficients = not args.plotXsecs + + helicities = [] + helicity_sum = None + weight_sum_all=0 + xsec=0 + for helicity_file in args.helicities: + with h5py.File(helicity_file, "r") as ff: + out = input_tools.load_results_h5py(ff) + hhelicity = out[args.process] + hhelicity = hhelicity[{"muRfact":1j, "muFfact":1j, "chargeVgen":0, "massVgen":0}] + hhelicity = hh.disableFlow(hhelicity, "absYVGen") + + weight_sum=0 + with h5py.File(helicity_file.replace("helicity_xsecs","gen_dists"), "r") as ff: + out = input_tools.load_results_h5py(ff) + for key in out.keys(): + if key.startswith(args.process) and key not in ["meta_info"]: + xsec = out[key]["dataset"]["xsec"] + weight_sum = out[key]["weight_sum"] + weight_sum_all += weight_sum + + if len(args.helicities)>1: + if helicity_sum is None: + helicity_sum = hhelicity.copy() + else: + helicity_sum = hh.addHists(helicity_sum, hhelicity) - npoints = 100 + hhelicity = hh.scaleHist(hhelicity, xsec/weight_sum) + helicities.append(hhelicity) - theta_edges = np.linspace(-np.pi, 0, npoints+1) - cosTheta_edges = np.cos(theta_edges) - theta = theta_edges[:-1] + np.diff(theta_edges)/2. - cosTheta = cosTheta_edges[:-1] + np.diff(cosTheta_edges)/2. + if helicity_sum is not None: + # under the assumption that xsec is the same for all processes + helicity_sum = hh.scaleHist(helicity_sum, xsec/weight_sum_all) - phi = np.linspace(-np.pi, np.pi, npoints) - x, y = np.meshgrid(theta, phi) - - histos = [] - for ai in [a0, a1, a2, a3, a4, a5, a6, a7, a8]: - histo = hist.Hist( - hist.axis.Variable(cosTheta_edges, name = "cosTheta", underflow=False, overflow=False), - hist.axis.Regular(npoints, -math.pi, math.pi, circular = True, name = "phi"), - storage=hist.storage.Double() - ) + if len(helicities) == 2: + chi2_stat = np.sum( (helicities[0].values(flow=True)-helicities[1].values(flow=True))**2/(helicities[0].variances(flow=True)+helicities[1].variances(flow=True)) ) + dof = len(helicities[0].values(flow=True).flatten()) - 1 + p_val = chi2.sf(chi2_stat, dof) - histo.values()[...] = ai(x,y).T + logger.info(f"Chi2/ndf = {chi2_stat} with p-value = {p_val}") - histos.append(histo) + colors = mpl.colormaps["tab10"] - scales = [0., 20./3., 5., 20., 4., 4., 5., 5., 4.] - offsets = [1., 2./3., 0., 0., 0., 0., 0., 0., 0.] + for var in ("absYVGen", "ptVGen"): - for moments in (True, False): + hhelicities_1D = [h.project(var, "helicity") for h in helicities] + if helicity_sum is not None: + h1d_sum = helicity_sum.project(var, "helicity") - # 2D plots - xlabel = styles.xlabels.get("costhetastarll", "cosTheta") - ylabel = styles.xlabels.get("phistarll", "phi") - for i, histo in enumerate(histos): - fig = plot_tools.makeHistPlot2D(histo, cms_label=args.cmsDecor, xlabel=xlabel, ylabel=ylabel, scaleleg=args.scaleleg, has_data=False) + if plot_coefficients: + hists1d = [theory_tools.helicity_xsec_to_angular_coeffs(h) for h in hhelicities_1D] + if helicity_sum is not None: + h1d_sum = theory_tools.helicity_xsec_to_angular_coeffs(h1d_sum) + else: + hists1d = hhelicities_1D + if helicity_sum is not None: + h1d_sum = helicity_sum.project(var, "helicity") - if i == 0: - idx = "UL" - else: - idx = str(i-1) + for i in hists1d[0].axes["helicity"]: + if i == -1 and plot_coefficients: + continue - outfile = "angular_" - if moments: - outfile += "moment" - else: - outfile += "coefficient" - outfile += f"_{idx}_cosTheta_phi" - if args.postfix: - outfile += f"_{args.postfix}" - plot_tools.save_pdf_and_png(outdir, outfile) - plot_tools.write_index_and_log(outdir, outfile, args=args) + h1ds = [h[{"helicity":complex(0,i)}] for h in hists1d] - # 1D plots - for axis_name, ais in [("cosTheta", [0,1,5]), ("phi", [3,4,6,8])]: - h1ds = [histo.project(axis_name)/np.product([histo.axes[n].size for n in histo.axes.name if n != axis_name]) for histo in histos] - - fig, ax1 = plot_tools.figure(h1ds[0], xlabel=styles.xlabels.get(f"{axis_name.lower()}starll", axis_name), ylabel="Frequency", - grid=True, automatic_scale=False, width_scale=1.2, logy=False) + ylabel = f"$\mathrm{{A}}_{i}$" if plot_coefficients else r"$\sigma_{"+(r"\mathrm{UL}" if i==-1 else str(i))+r"}\,[\mathrm{pb}]$" + fig, ax1 = plot_tools.figure(h1ds[0], xlabel=styles.xlabels.get(var, var), + ylabel=ylabel, + grid=False, automatic_scale=False, width_scale=1.2, logy=False) - j=0 - for i, h1d in enumerate(h1ds): - if i not in ais: - continue - - val_x = h1d.axes[0].centers - val_y = h1d.values() - if i == 0: - idx = "\mathrm{UL}" - else: - idx = str(i-1) - if moments: - val_y = val_y * scales[i] + offsets[i] - label=f"$\mathrm{{M}}_{idx}$" - else: - label=f"$\mathrm{{A}}_{idx}$" - - ax1.plot(val_x, val_y, color=colors(i), linestyle=linestyles[j], label=label) - j += 1 + y_min=np.inf + y_max=-np.inf + for j, h in enumerate(h1ds): + hep.histplot( + h, + histtype="step", + color=colors(j), + label=args.labels[j], + yerr=False, + ax=ax1, + zorder=3, + density=False, + binwnorm=None if plot_coefficients else 1, + ) + y_min = min(y_min, min(h.values())) + y_max = max(y_max, max(h.values())) + + if helicity_sum is not None: + h = h1d_sum[{"helicity":complex(0,i)}] + hep.histplot( + h, + histtype="step", + color="black", + label="Sum", + linestyle="-", + yerr=False, + ax=ax1, + zorder=3, + density=False, + binwnorm=None if plot_coefficients else 1, + ) + y_min = min(y_min, min(h.values())) + y_max = max(y_max, max(h.values())) + + if not plot_coefficients: + y_min, y_max = ax1.get_ylim() + yrange = y_max-y_min + + x_min, x_max = ax1.get_xlim() + plt.plot([x_min, x_max], [0,0], color="black", linestyle="--") + + ax1.set_ylim([y_min-yrange*0.1, y_max+yrange*0.2]) plot_tools.addLegend(ax1, ncols=2, text_size=12, loc="upper left") plot_tools.fix_axes(ax1, logy=False) @@ -161,12 +142,12 @@ def load_moments_coefficients(filename, process="Z"): hep.cms.label(ax=ax1, lumi=None, fontsize=20*args.scaleleg*scale, label=args.cmsDecor, data=False) - outfile = "angular_" - if moments: - outfile += "moments" + if plot_coefficients: + outfile = f"angular_coefficient_{i}" else: - outfile += "coefficients" - outfile += f"_{axis_name}" + outfile = f"helicity_xsec_" + ("UL" if i==-1 else str(i)) + + outfile += f"_{var}" if args.postfix: outfile += f"_{args.postfix}" plot_tools.save_pdf_and_png(outdir, outfile) @@ -174,4 +155,3 @@ def load_moments_coefficients(filename, process="Z"): if output_tools.is_eosuser_path(args.outpath) and args.eoscp: output_tools.copy_to_eos(outdir, args.outpath, args.outfolder) - diff --git a/scripts/plotting/plot_harmonic_polynomials.py b/scripts/plotting/plot_harmonic_polynomials.py new file mode 100644 index 000000000..04cbf9156 --- /dev/null +++ b/scripts/plotting/plot_harmonic_polynomials.py @@ -0,0 +1,137 @@ +from utilities import logging, common +from utilities.io_tools import output_tools +from utilities.styles import styles +from wremnants import plot_tools + +import numpy as np +import matplotlib as mpl +import hist +import math +import mplhep as hep + +# harmonic polynomials +def p0(theta, phi): + return 1. + np.cos(theta)**2 + +def p1(theta, phi): + return 0.5 * (1. - 3. * np.cos(theta)**2) + +def p2(theta, phi): + return np.sin(2*theta) * np.cos(phi) + +def p3(theta, phi): + return 0.5 * np.sin(theta)**2 * np.cos(2*phi) + +def p4(theta, phi): + return np.sin(theta) * np.cos(phi) + +def p5(theta, phi): + return np.cos(theta) + +def p6(theta, phi): + return np.sin(theta)**2 * np.sin(2*phi) + +def p7(theta, phi): + return np.sin(2*theta) * np.sin(phi) + +def p8(theta, phi): + return np.sin(theta) * np.sin(phi) + +def plot_harmonic_polynomials(outdir, args): + colors = mpl.colormaps["tab10"] + linestyles = ["solid","dotted","dashed","dashdot"] + + npoints = 100 + + theta_edges = np.linspace(-np.pi, 0, npoints+1) + cosTheta_edges = np.cos(theta_edges) + theta = theta_edges[:-1] + np.diff(theta_edges)/2. + cosTheta = cosTheta_edges[:-1] + np.diff(cosTheta_edges)/2. + + phi = np.linspace(-np.pi, np.pi, npoints) + x, y = np.meshgrid(theta, phi) + + histos = [] + for pi in [p0, p1, p2, p3, p4, p5, p6, p7, p8]: + histo = hist.Hist( + hist.axis.Variable(cosTheta_edges, name = "cosTheta", underflow=False, overflow=False), + hist.axis.Regular(npoints, -math.pi, math.pi, circular = True, name = "phi"), + storage=hist.storage.Double() + ) + histo.values()[...] = pi(x,y).T + histos.append(histo) + + # 2D plots + xlabel = styles.xlabels.get("costhetastarll", "cosTheta") + ylabel = styles.xlabels.get("phistarll", "phi") + for i, histo in enumerate(histos): + fig = plot_tools.makeHistPlot2D(histo, cms_label=args.cmsDecor, xlabel=xlabel, ylabel=ylabel, scaleleg=args.scaleleg, has_data=False) + + if i == 0: + idx = "UL" + else: + idx = str(i-1) + + if moments: + outfile = "moment" + else: + outfile = "polynomial" + outfile += f"_{idx}_cosTheta_phi" + if args.postfix: + outfile += f"_{args.postfix}" + plot_tools.save_pdf_and_png(outdir, outfile) + plot_tools.write_index_and_log(outdir, outfile, args=args) + + # 1D plots + for axis_name, ais in [("cosTheta", [0,1,5]), ("phi", [3,4,6,8])]: + h1ds = [histo.project(axis_name)/np.product([histo.axes[n].size for n in histo.axes.name if n != axis_name]) for histo in histos] + + fig, ax1 = plot_tools.figure(h1ds[0], xlabel=styles.xlabels.get(f"{axis_name.lower()}starll", axis_name), ylabel="Frequency", + grid=True, automatic_scale=False, width_scale=1.2, logy=False) + + j=0 + for i, h1d in enumerate(h1ds): + if i not in ais: + continue + + val_x = h1d.axes[0].centers + val_y = h1d.values() + if i == 0: + idx = "\mathrm{UL}" + else: + idx = str(i-1) + if moments: + val_y = val_y * scales[i] + offsets[i] + label=f"$\mathrm{{M}}_{idx}$" + else: + label=f"$\mathrm{{P}}_{idx}$" + + ax1.plot(val_x, val_y, color=colors(i), linestyle=linestyles[j], label=label) + j += 1 + + plot_tools.addLegend(ax1, ncols=2, text_size=12, loc="upper left") + plot_tools.fix_axes(ax1, logy=False) + + scale = max(1, np.divide(*ax1.get_figure().get_size_inches())*0.3) + hep.cms.label(ax=ax1, lumi=None, fontsize=20*args.scaleleg*scale, + label=args.cmsDecor, data=False) + + outfile = "harmonic_polynomial" + outfile += f"_{axis_name}" + if args.postfix: + outfile += f"_{args.postfix}" + plot_tools.save_pdf_and_png(outdir, outfile) + plot_tools.write_index_and_log(outdir, outfile, args=args) + +if __name__ == '__main__': + parser = common.plot_parser() + args = parser.parse_args() + logger = logging.setup_logger(__file__, args.verbose, args.noColorLogger) + + outdir = output_tools.make_plot_dir(args.outpath, args.outfolder, eoscp=args.eoscp) + + plot_harmonic_polynomials(outdir, args) + + if output_tools.is_eosuser_path(args.outpath) and args.eoscp: + output_tools.copy_to_eos(outdir, args.outpath, args.outfolder) + diff --git a/scripts/plotting/postfitPlots.py b/scripts/plotting/postfitPlots.py index b42168b4f..99457666c 100644 --- a/scripts/plotting/postfitPlots.py +++ b/scripts/plotting/postfitPlots.py @@ -314,7 +314,7 @@ def make_plots(hist_data, hist_inclusive, hist_stack, axes, channel="", *opts, * "etaAbsEta": hist.axis.Variable([-2.4, -2.0, -1.6, -1.4, -1.2, -1.0, -0.6, 0.0, 0.6, 1.0, 1.2, 1.4, 1.6, 2.0, 2.4], name = "etaAbsEta", overflow=False, underflow=False), "cosThetaStarll": hist.axis.Regular(2, -1., 1., name = "cosThetaStarll", underflow=False, overflow=False), "yll": hist.axis.Regular(20, -2.5, 2.5, name = "yll", overflow=False, underflow=False), - "ptll": hist.axis.Variable([0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 20, 23, 27, 32, 40, 54, 100], name = "ptll", underflow=False, overflow=False), + "ptll": hist.axis.Variable(common.get_dilepton_ptV_binning(False), name = "ptll", underflow=False, overflow=False), } elif analysis=="ZMassWLike": all_axes = { diff --git a/scripts/plotting/response_matrix.py b/scripts/plotting/response_matrix.py index 655d504d7..ae177ce10 100644 --- a/scripts/plotting/response_matrix.py +++ b/scripts/plotting/response_matrix.py @@ -3,6 +3,7 @@ import mplhep as hep import matplotlib.pyplot as plt import numpy as np +import hist from wremnants import logging, common from wremnants import plot_tools @@ -10,6 +11,8 @@ from utilities import boostHistHelpers as hh from utilities.io_tools import output_tools +import pdb + parser = common.plot_parser() parser.add_argument("infile", help="Output file of the analysis stage, containing ND boost histogrdams") parser.add_argument("--procFilters", type=str, nargs="*", default="Zmumu", help="Filter to plot (default no filter, only specify if you want a subset") @@ -48,13 +51,18 @@ translate_label = { "pt" : "$\mathrm{Reco}\ p_\mathrm{T}\ [\mathrm{GeV}]$", "ptGen" : "$\mathrm{Gen}\ p_\mathrm{T}\ [\mathrm{GeV}]$", + "eta" : "$\mathrm{Reco}\ \eta$", "abs(eta)" : "$\mathrm{Reco}\ |\eta|$", "absEtaGen" : "$\mathrm{Gen}\ |\eta|$", "ptll" : r"$\mathrm{Reco}\ p_\mathrm{T}(\ell\ell)\ [\mathrm{GeV}]$", "ptW" : r"$\mathrm{Reco}\ p_\mathrm{T}(\ell\nu)\ [\mathrm{GeV}]$", "ptVGen" : "$\mathrm{Gen}\ p_\mathrm{T}(V)\ [\mathrm{GeV}]$", + "yll" : "$\mathrm{Reco}\ Y(\mathrm{V})$", "abs(yll)" : "$\mathrm{Reco}\ |Y(\mathrm{V})|$", "absYVGen" : "$\mathrm{Gen}\ |Y(\mathrm{V})|$", + "cosThetaStarll" : r"$\mathrm{Reco}\ \cos{\theta^{\star}_{\ell\ell}}$", + "phiStarll" : r"$\mathrm{Reco}\ \phi^{\star}_{\ell\ell}$", + "helicitySig" : "Helicity", } @@ -83,36 +91,224 @@ def get_stability(matrix, xbins, ybins): # stability is same computation as purity with inverted axes return get_purity(matrix.T, ybins, xbins) +def plot_resolution(histo, axes_reco, axis_gen, selections_global, selections_slices, suffix=None, normalize=False): + # plot slices of gen bins in 1D reco space + + if isinstance(axes_reco, str): + axes_reco = [axes_reco] + for i, a in enumerate(axes_reco): + if a.startswith("abs("): + axes_reco[i] = a[4:-1] + histo = hh.makeAbsHist(histo, a[4:-1], rename=False) + + if len(axes_reco) == 1: + xlabel = translate_label[axes_reco[0]] + else: + xlabel = '-'.join([translate_label[a].replace('[\mathrm{GeV}]', '') for a in axes_reco]) + xlabel = xlabel.replace(r"-$\mathrm{Reco}", "-$") + ' bin' + + for sel, idx in selections_global: + if sel is not None: + if histo.axes[sel].size-1 < idx: + continue + h2d = histo[{sel:idx}].project(axis_gen, *axes_reco) + else: + h2d = histo.project(axis_gen, *axes_reco) + + fig = plt.figure(figsize=(10,6)) + ax = fig.add_subplot() + ax.set_xlabel(xlabel) + if normalize: + ylabel = "Frequency" + else: + ylabel = "Events" + if xlabel.endswith("bin"): + ylabel += " / bin" + else: + ylabel += " / unit" + + ax.set_ylabel(ylabel) + + xedges = None + for sel2, idx2 in selections_slices: + if h2d.axes[sel2].size-1 < idx2: + continue + + if isinstance(h2d.axes[sel2], hist.axis.Integer): + label = int(h2d.axes[sel2].edges[idx2]) + if sel2 == "helicitySig": + if idx2 == 0: + label = "$\sigma_{\mathrm{UL}}$" + else: + label = f"$\sigma_{label}$" + else: + edges = h2d.axes[sel2].edges + var2 = translate_label[sel2].replace('[\mathrm{GeV}]', '') + if idx2 == hist.overflow: + label = f"{var2} > {edges[-1]}" + elif idx2+1 < len(edges): + lo, hi = edges[idx2], edges[idx2+1] + label = f"{lo} < {var2} < {hi}" + h1d = h2d[{sel2:idx2}] + if len(axes_reco) > 1: + h1d = hh.unrolledHist(h1d, binwnorm=None, obs=axes_reco) + + values = h1d.values() + if len(axes_reco) == 1: + values /= np.diff(h1d.axes[0].edges) + + if normalize: + values /= abs(values).sum() + + ax.stairs(values, edges=h1d.axes[0].edges, label=label) + + if xedges is None: + xedges = h1d.axes[0].edges + + ax.set_xlim([min(xedges), max(xedges)]) + + y_min, y_max = ax.get_ylim() + ax.set_ylim([min(0,y_min), y_max*1.5]) + + # Use scientific notation + ax.ticklabel_format(style='sci', axis='y', scilimits=(-2,2)) + + # move scientific notation (e.g. 10^5) a bit to the left + offset_text = ax.get_yaxis().get_offset_text() + offset_text.set_position((-0.08,1.02)) + + if sel is not None: + lo, hi = histo.axes[sel].edges[idx], histo.axes[sel].edges[idx+1] + var = translate_label[sel].replace('[\mathrm{GeV}]', '') + if sel.startswith("abs") and lo==0: + title = f"{var} < {hi}" + else: + title = f"{lo} < {var} < {hi}" + + plt.text(0.06, 0.94, title, horizontalalignment='left', verticalalignment='top', transform=ax.transAxes, fontsize=20) + + hep.cms.label(ax=ax, fontsize=20, label=args.cmsDecor, data=False) + plot_tools.addLegend(ax, ncols=np.ceil(len(selections_slices)/2), text_size=15*args.scaleleg) + outfile = f"resolution_{g_name}_{'_'.join(axes_reco)}" + + if sel is not None: + outfile += f"_{sel}{idx}" + if suffix: + outfile += f"_{suffix}" + plot_tools.save_pdf_and_png(outdir, outfile) + + plot_tools.write_index_and_log(outdir, outfile, + analysis_meta_info={args.infile : groups.getMetaInfo()}, + args=args, + ) for g_name, group in datagroups.items(): - hist = group.hists[args.histName] + histo = group.hists[args.histName] for channel in args.channels: select = {} if channel == "all" else {"charge" : -1.j if channel == "minus" else 1.j} + # plot slices of resolution + if all(x in histo.axes.name for x in ["pt", "ptGen", "absEtaGen"]): + plot_resolution( + histo, axes_reco="pt", axis_gen="ptGen", + selections_global=(("absEtaGen",0), ("absEtaGen",17)), + selections_slices=(("ptGen",0), ("ptGen",6), ("ptGen",13)), + ) + if all(x in histo.axes.name for x in ["ptGen", "eta", "absEtaGen"]): + plot_resolution( + histo, axes_reco="abs(eta)", axis_gen="absEtaGen", + selections_global=(("ptGen",0), ("ptGen",13)), + selections_slices=(("absEtaGen",0), ("absEtaGen",8), ("absEtaGen",17)), + ) + + if all(x in histo.axes.name for x in ["ptll", "ptVGen", "absYVGen"]): + plot_resolution( + histo, axes_reco="ptll", axis_gen="ptVGen", + selections_global=(("absYVGen",0), ("absYVGen",3),), + selections_slices=(("ptVGen",0), ("ptVGen",8), ("ptVGen",hist.overflow)), + ) + if all(x in histo.axes.name for x in ["ptll", "ptVGen", "absYVGen"]): + plot_resolution( + histo, axes_reco="abs(yll)", axis_gen="absYVGen", + selections_global=(("ptVGen",0), ("ptVGen",10),), + selections_slices=(("absYVGen",0), ("absYVGen",2), ("absYVGen",hist.overflow)), + ) + + if all(x in histo.axes.name for x in ["cosThetaStarll", "helicitySig"]): + plot_resolution( + histo, axes_reco="cosThetaStarll", axis_gen="helicitySig", + selections_global=((None, None),), + selections_slices=([("helicitySig",i) for i in range(0,9)]), + normalize=True, + ) + if all(x in histo.axes.name for x in ["phiStarll", "helicitySig"]): + plot_resolution( + histo, axes_reco="phiStarll", axis_gen="helicitySig", + selections_global=((None, None),), + selections_slices=([("helicitySig",i) for i in range(0,9)]), + normalize=True, + ) + + if all(x in histo.axes.name for x in ["cosThetaStarll","phiStarll", "helicitySig"]): + plot_resolution( + histo, axes_reco=["cosThetaStarll","phiStarll"], axis_gen="helicitySig", + selections_global=((None, None),), + selections_slices=([("helicitySig",i) for i in range(0,9)]), + normalize=True, + ) + for i in range(0,9): + plot_resolution( + histo, axes_reco=["cosThetaStarll","phiStarll"], axis_gen="helicitySig", + selections_global=((None, None),), + selections_slices=(("helicitySig",i),), + suffix=f"HelicityIdx{i}", normalize=False, + ) + plot_resolution( + histo, axes_reco="ptll", axis_gen="helicitySig", + selections_global=((None, None),), + selections_slices=(("helicitySig",i),), + suffix=f"HelicityIdx{i}", normalize=False, + ) + plot_resolution( + histo, axes_reco="abs(yll)", axis_gen="helicitySig", + selections_global=((None, None),), + selections_slices=(("helicitySig",i),), + suffix=f"HelicityIdx{i}", normalize=False, + ) + for axes_string in args.axes: axes = axes_string.split("-") - if (groups.mode[0] == "w" or "wlike" in groups.mode) and axes[0] == "pt": + if ( + ((groups.mode[0] == "w" or "wlike" in groups.mode) and axes[1] == "ptGen") or + ((groups.mode[0] == "z") and axes[1] == "ptVGen") + ): genFlow=True else: genFlow=False if axes[0].startswith("abs("): # mirror axis at half - hist2d = hist[select].project(axes[0][4:-1], *axes[1:]) + hist2d = histo[select].project(axes[0][4:-1], *axes[1:]) nbins = len(hist2d.axes.edges[0])-1 values = hist2d.values(flow=genFlow)[:int(nbins/2)][::-1] + hist2d.values(flow=genFlow)[int(nbins/2):] xbins = hist2d.axes[0].edges[int(nbins/2):] else: - hist2d = hist[select].project(*axes) + hist2d = histo[select].project(*axes) values = hist2d.values(flow=genFlow) xbins = hist2d.axes[0].edges + if np.sum(values<0): + logger.warning(f"Found {np.sum(values<0)} negative entries {values[values<0]}. Setting to zero") + values[values<0] = 0 + ybins = hist2d.axes[1].edges - if genFlow: - ybins = np.array([ybins[0]-(ybins[1]-ybins[0]), *ybins, ybins[-1]+(ybins[-1]-ybins[-2])]) + if genFlow and hist2d.axes[1].traits.underflow: + ybins = np.array([xbins[0], *ybins]) + if genFlow and hist2d.axes[1].traits.overflow: + ybins = np.array([*ybins, xbins[-1]]) outname = g_name+"_"+"_".join([a.replace("(","").replace(")","") for a in axes]) @@ -139,7 +335,6 @@ def get_stability(matrix, xbins, ybins): plot_tools.save_pdf_and_png(outdir, outfile) - # plot stability fig = plt.figure(figsize=(8,4)) ax = fig.add_subplot() @@ -162,7 +357,6 @@ def get_stability(matrix, xbins, ybins): outfile = "stability_"+outname plot_tools.save_pdf_and_png(outdir, outfile) - # plot response matrix fig = plt.figure()#figsize=(8*width,8)) ax = fig.add_subplot() @@ -174,7 +368,7 @@ def get_stability(matrix, xbins, ybins): # calculate condition number nans = np.isnan(values) - nancount = np.count_nonzero(nans) + nancount = nans.sum() if nancount: logger.warning(f"Found {nancount} NaNs in positions {np.argwhere(nans)}. Setting to zero") values[nans] = 0 diff --git a/scripts/plotting/unfolding_xsec.py b/scripts/plotting/unfolding_xsec.py index 3ec1c2b2b..35b46feef 100644 --- a/scripts/plotting/unfolding_xsec.py +++ b/scripts/plotting/unfolding_xsec.py @@ -12,30 +12,37 @@ from utilities import boostHistHelpers as hh, common, logging from utilities.styles import styles -from utilities.io_tools import input_tools, output_tools -from utilities.io_tools.conversion_tools import fitresult_pois_to_hist +from utilities.io_tools import input_tools, output_tools, conversion_tools from wremnants.datasets.datagroups import Datagroups from wremnants import plot_tools import pdb hep.style.use(hep.style.ROOT) +poi_type_choices = ["nois", "mu", "pmaskedexp", "pmaskedexpnorm", "sumpois", "sumpoisnorm", "chargepois", "ratiometapois", "helpois", "helmetapois"] + parser = common.plot_parser() parser.add_argument("infile", type=str, help="Combine fitresult file") -parser.add_argument("--reference", type=str, default=None, help="Optional combine fitresult file from an reference fit for comparison") -parser.add_argument("--refName", type=str, default="Reference model", help="Name for reference source") +parser.add_argument("--name", type=str, default="Unfolded data", help="Name for main source") parser.add_argument("-r", "--rrange", type=float, nargs=2, default=[0.9,1.1], help="y range for ratio plot") +parser.add_argument("--ylabel", type=str, default=None, help="Specify a y-axis label (if not it will be set automatic)") parser.add_argument("--ylim", type=float, nargs=2, help="Min and max values for y axis (if not specified, range set automatically)") parser.add_argument("--logy", action='store_true', help="Make the yscale logarithmic") parser.add_argument("--yscale", type=float, help="Scale the upper y axis by this factor (useful when auto scaling cuts off legend)") parser.add_argument("--noData", action='store_true', help="Don't plot data") +parser.add_argument("--pulls", action='store_true', help="Make ratio as pulls between data and reference inputs") parser.add_argument("--plots", type=str, nargs="+", default=["xsec", "uncertainties"], choices=["xsec", "uncertainties", "ratio"], help="Define which plots to make") -parser.add_argument("--selectionAxes", type=str, default=["qGen", ], +parser.add_argument("--selectionAxes", type=str, default=["qGen", "helicitySig", "A"], help="List of axes where for each bin a seperate plot is created") parser.add_argument("--genFlow", action='store_true', help="Show overflow/underflow pois") parser.add_argument("--poiTypes", type=str, nargs="+", default=["pmaskedexp", "sumpois"], help="POI types used for the plotting", - choices=["noi", "mu", "pmaskedexp", "pmaskedexpnorm", "sumpois", "sumpoisnorm", ]) -parser.add_argument("--scaleXsec", type=float, default=1.0, help="Scale xsec predictions with this number") + choices=poi_type_choices) +# specify a reference unfolding +parser.add_argument("--reference", type=str, default=None, help="Optional combine fitresult file from an reference fit for comparison") +parser.add_argument("--refName", type=str, default="Reference model", help="Name for reference source") +parser.add_argument("--poiTypeReference", type=str, default=None, help="POI types used for the plotting of the reference", + choices=poi_type_choices) + parser.add_argument("--grouping", type=str, default=None, help="Select nuisances by a predefined grouping", choices=styles.nuisance_groupings.keys()) parser.add_argument("--ratioToPred", action='store_true', help="Use prediction as denominator in ratio") parser.add_argument("-t","--translate", type=str, default=None, help="Specify .json file to translate labels") @@ -45,6 +52,7 @@ parser.add_argument("-n", "--baseName", type=str, help="Histogram name in the file (e.g., 'xnorm', 'nominal', ...)", default="xnorm") parser.add_argument("--varNames", default=['uncorr'], type=str, nargs='+', help="Name of variation hist") parser.add_argument("--varLabels", default=['MiNNLO'], type=str, nargs='+', help="Label(s) of variation hist for plotting") +parser.add_argument("--varMarkers", default=['*','v','^','x'], type=str, nargs='+', help="Label(s) of variation hist for plotting") parser.add_argument("--selectAxis", default=[], type=str, nargs='*', help="If you need to select a variation axis") parser.add_argument("--selectEntries", default=[], type=str, nargs='*', help="entries to read from the selected axis") parser.add_argument("--colors", default=['red',], type=str, nargs='+', help="Variation colors") @@ -58,9 +66,14 @@ if args.reference is not None and args.reference.endswith(".root"): args.reference = args.reference.replace(".root", ".hdf5") -result, meta = fitresult_pois_to_hist(args.infile, poi_types=args.poiTypes, uncertainties=None, translate_poi_types=False) +result, meta = conversion_tools.fitresult_pois_to_hist(args.infile, poi_types=args.poiTypes, uncertainties=None, translate_poi_types=False, merge_gen_charge_W=False) + if args.reference: - result_ref, meta_ref = fitresult_pois_to_hist(args.reference, poi_types=args.poiTypes, uncertainties=None, translate_poi_types=False) + if args.poiTypeReference is None: + poi_types_ref = args.poiTypes + else: + poi_types_ref = [args.poiTypeReference,] + result_ref, meta_ref = conversion_tools.fitresult_pois_to_hist(args.reference, poi_types=poi_types_ref, uncertainties=None, translate_poi_types=False, merge_gen_charge_W=False) grouping = styles.nuisance_groupings.get(args.grouping, None) @@ -115,17 +128,16 @@ def sum_and_unc(h,scale=100 if percentage else 1): return pd.DataFrame(entries, columns=columns) def plot_xsec_unfolded(hist_xsec, hist_xsec_stat=None, hist_ref=None, poi_type="mu", channel="ch0", proc="W", - hist_others=[], label_others=[], color_others=[], lumi=None + hist_others=[], label_others=[], marker_others=[], color_others=[], lumi=None, pulls=False ): # ratio to data if there is no other histogram to make a ratio from ratioToData = not args.ratioToPred or len(hist_others) == 0 logger.info(f"Make unfoled {poi_type} plot in channel {channel}") - - yLabel = styles.poi_types[poi_type] + yLabel = args.ylabel if args.ylabel else styles.poi_types.get(poi_type, poi_type) # unroll histograms - binwnorm = 1 if poi_type not in ["noi", "mu",] else None + binwnorm = None if poi_type in ["nois", "mu", "chargepois", "ratiometapois", "helpois", "helmetapois"] else 1 axes_names = hist_xsec.axes.name if len(axes_names) > 1: hist_xsec = hh.unrolledHist(hist_xsec, binwnorm=binwnorm, add_flow_bins=args.genFlow) @@ -139,32 +151,42 @@ def plot_xsec_unfolded(hist_xsec, hist_xsec_stat=None, hist_ref=None, poi_type=" edges = hist_xsec.axes.edges[0] binwidths = np.diff(edges) + centers = hist_xsec.axes.centers[0] - if ratioToData: - rlabel="Pred./Data" + if binwnorm==1: + yLabel = yLabel+"/unit" + else: + binwidths = np.ones_like(binwidths) + + if pulls: + rlabel=f"Pulls" + elif ratioToData: + # rlabel=f"{args.refName}/{args.name}" + rlabel=f"1/{args.name}" else: rlabel=f"Data/{label_others[0]}" + rrange = args.rrange + unc = np.sqrt(hist_xsec.variances())/binwidths + y = hist_xsec.values()/binwidths + if args.ylim is None: - ylim = (0, 1.1 * max( (hist_xsec.values()+np.sqrt(hist_xsec.variances()))/(binwidths if binwnorm is not None else 1)) ) + ymax = max(y+unc) + ymin = min(y-unc) + yrange = ymax-ymin + ylim = (min(0, ymin - 0.1 * yrange), max(0, ymax + 0.1 * yrange)) else: ylim = args.ylim # make plots fig, ax1, ax2 = plot_tools.figureWithRatio(hist_xsec, xlabel, yLabel, ylim, rlabel, rrange, width_scale=2) - hep.histplot( - hist_xsec, - yerr=np.sqrt(hist_xsec.variances()), - histtype="errorbar", - color="black", - label="Unfolded data", - ax=ax1, - alpha=1., - zorder=2, - binwnorm=binwnorm - ) + ax1.hlines(y, edges[:-1], edges[1:], colors="black", label=args.name) + ax1.bar(centers, height=2*unc, bottom=y-unc, width=edges[1:] - edges[:-1], color="silver", label="Total") + if hist_xsec_stat is not None: + unc_stat = np.sqrt(hist_xsec_stat.variances())/binwidths + ax1.bar(centers, height=2*unc_stat, bottom=y-unc_stat, width=edges[1:] - edges[:-1], color="gold", label="Stat") if args.genFlow: #TODO TEST ax1.fill([0,18.5, 18.5, 0,0], [ylim[0],*ylim,ylim[1],ylim[0]], color="grey", alpha=0.3) @@ -173,85 +195,69 @@ def plot_xsec_unfolded(hist_xsec, hist_xsec_stat=None, hist_ref=None, poi_type=" ax2.fill([0,18.5, 18.5, 0,0], [rrange[0],*rrange,rrange[1],rrange[0]], color="grey", alpha=0.3) ax2.fill([len(edges)-17.5, len(edges)+0.5, len(edges)+0.5, len(edges)-17.5, len(edges)-17.5], [rrange[0],*rrange,rrange[1],rrange[0]], color="grey", alpha=0.3) - centers = hist_xsec.axes.centers[0] - - if ratioToData: - unc_ratio = np.sqrt(hist_xsec.variances()) /hist_xsec.values() + if ratioToData and not pulls: + unc_ratio = unc / y ax2.bar(centers, height=2*unc_ratio, bottom=1-unc_ratio, width=edges[1:] - edges[:-1], color="silver", label="Total") if hist_xsec_stat is not None: - unc_ratio_stat = np.sqrt(hist_xsec_stat.variances()) / hist_xsec_stat.values() + unc_ratio_stat = unc_stat / y ax2.bar(centers, height=2*unc_ratio_stat, bottom=1-unc_ratio_stat, width=edges[1:] - edges[:-1], color="gold", label="Stat") - ax2.plot([min(edges), max(edges)], [1,1], color="black", linestyle="-") + if pulls: + ax2.plot([min(edges), max(edges)], [0, 0], color="black", linestyle="-") + else: + ax2.plot([min(edges), max(edges)], [1, 1], color="black", linestyle="-") if ratioToData: hden=hist_xsec - for i, (h, l, c) in enumerate(zip(hist_others, label_others, color_others)): - # h_flat = hist.Hist( - # hist.axis.Variable(edges, underflow=False, overflow=False), storage=hist.storage.Weight()) - # h_flat.view(flow=False)[...] = np.stack([h.values(flow=args.genFlow).flatten()/bin_widths, h.variances(flow=args.genFlow).flatten()/bin_widths**2], axis=-1) + for i, (h, l, m, c) in enumerate(zip(hist_others, label_others, marker_others, color_others)): + + y = h.values()/binwidths + ax1.plot(centers, y, linewidth=0, marker=m, color=c, label=l) + + if not pulls: + if i==0 and not ratioToData: + hden=h + hep.histplot( + hh.divideHists(h, hden, cutoff=0, rel_unc=True), + yerr=False, + histtype="step", + color="black", + ax=ax2, + zorder=2, + ) + continue + + y = hh.divideHists(h, hden, cutoff=0, rel_unc=True).values() + ax2.plot(centers, y, linewidth=0, marker=m, color=c) - hep.histplot( - h, - yerr=False, - histtype="step", - color=c, - label=l, - ax=ax1, - alpha=1., - zorder=2, - binwnorm=binwnorm - ) - if i==0 and not ratioToData: - hden=h + if hist_ref is not None: + y = hist_ref.values()/binwidths + ax1.plot(centers, y, linewidth=0, marker='o', color="blue", label=args.refName) + + if pulls: + hdiff = hh.addHists(hist_ref, hden, scale2=-1.) + pull_values = hdiff.values() / np.sqrt(hden.variances()) + hdiff.values()[...] = pull_values + + logger.info(f"Min/Max pull value is {pull_values.min()}/{pull_values.max()}") + hep.histplot( - hh.divideHists(h, hden, cutoff=0, rel_unc=True), - yerr=True, + hdiff, + yerr=False, histtype="errorbar", color="black", + # label="Model", ax=ax2, - zorder=2, - binwnorm=binwnorm - ) - continue - - hep.histplot( - hh.divideHists(h, hden, cutoff=0, rel_unc=True), - yerr=False, - histtype="step", - color=c, - ax=ax2, - zorder=2, - binwnorm=binwnorm - ) + ) + else: + hr = hh.divideHists(hist_ref, hden, cutoff=0, rel_unc=True) + y = hr.values() + ax2.plot(centers, y, linewidth=0, marker='o', color="blue") - if hist_ref is not None: - hep.histplot( - hist_ref, - yerr=False, - histtype="step", - color="blue", - label=args.refName, - ax=ax1, - alpha=1., - zorder=2, - binwnorm=binwnorm - ) - - hep.histplot( - hh.divideHists(hist_ref, hden, cutoff=0, rel_unc=True), - yerr=False, - histtype="step", - color="blue", - # label="Model", - ax=ax2, - binwnorm=binwnorm - ) plot_tools.addLegend(ax1, ncols=2, text_size=15*args.scaleleg) - plot_tools.addLegend(ax2, ncols=2, text_size=15*args.scaleleg) plot_tools.fix_axes(ax1, ax2, yscale=args.yscale) scale = max(1, np.divide(*ax1.get_figure().get_size_inches())*0.3) @@ -263,6 +269,8 @@ def plot_xsec_unfolded(hist_xsec, hist_xsec_stat=None, hist_ref=None, poi_type=" outfile += "_"+"_".join(axes_names) outfile += (f"_{channel}" if channel else "") outfile += (f"_{args.postfix}" if args.postfix else "") + if pulls: + outfile += "_pulls" plot_tools.save_pdf_and_png(outdir, outfile) if hist_ref is not None: @@ -277,15 +285,15 @@ def plot_xsec_unfolded(hist_xsec, hist_xsec_stat=None, hist_ref=None, poi_type=" plt.close() def plot_uncertainties_unfolded(hist_xsec, hist_stat, hist_syst, poi_type, channel="ch0", proc="W", - logy=False, relative_uncertainty=False, percentage=True, lumi=None, - error_threshold=0.001, # only uncertainties are shown with a max error larger than this threshold + logy=False, relative_uncertainty=True, percentage=True, lumi=None, + error_threshold=0.001, flow=False, # only uncertainties are shown with a max error larger than this threshold ): logger.info(f"Make unfoled {poi_type} plot"+(f" in channel {channel}" if channel else "")) # read nominal values and uncertainties from fit result and fill histograms logger.debug(f"Produce histograms") - yLabel = styles.poi_types[poi_type] + yLabel = styles.poi_types.get(poi_type, poi_type) if relative_uncertainty: yLabel = "$\delta$ "+ yLabel yLabel = yLabel.replace(" [pb]","") @@ -299,22 +307,23 @@ def plot_uncertainties_unfolded(hist_xsec, hist_stat, hist_syst, poi_type, chann scale = 100 if percentage else 1 if relative_uncertainty: - scale = scale / hist_xsec.values()#/binwidths + scale = scale / hist_xsec.values(flow=flow)#/binwidths - err = np.sqrt(hist_xsec.variances(flow=True)) * scale - err_stat = np.sqrt(hist_stat.variances(flow=True)) * scale - err_syst = hh.addHists(hist_syst, hist_xsec, scale2=-1).values(flow=True) * (scale[...,np.newaxis] if relative_uncertainty else scale) + err = np.sqrt(hist_xsec.variances(flow=flow)) * scale + err_stat = np.sqrt(hist_stat.variances(flow=flow)) * scale + err_syst = hh.addHists(hist_syst, hist_xsec, scale2=-1).values(flow=flow) * (scale[...,np.newaxis] if relative_uncertainty else scale) + # err_syst = hist_syst.values(flow=flow) * (scale[...,np.newaxis] if relative_uncertainty else scale) hist_err = hist.Hist(*hist_xsec.axes, storage=hist.storage.Double()) hist_err_stat = hist.Hist(*hist_stat.axes, storage=hist.storage.Double()) hist_err_syst = hist.Hist(*hist_syst.axes, storage=hist.storage.Double()) - hist_err.view(flow=True)[...] = err - hist_err_stat.view(flow=True)[...] = err_stat - hist_err_syst.view(flow=True)[...] = err_syst + hist_err.view(flow=flow)[...] = err + hist_err_stat.view(flow=flow)[...] = err_stat + hist_err_syst.view(flow=flow)[...] = err_syst # unroll histograms - binwnorm = 1 if poi_type not in ["noi", "mu",] else None + binwnorm = 1 if not relative_uncertainty and poi_type not in ["noi", "mu",] else None if len(axes_names) > 1: hist_err = hh.unrolledHist(hist_err, binwnorm=binwnorm, add_flow_bins=args.genFlow) hist_err_stat = hh.unrolledHist(hist_err_stat, binwnorm=binwnorm, add_flow_bins=args.genFlow) @@ -357,7 +366,7 @@ def plot_uncertainties_unfolded(hist_xsec, hist_stat, hist_syst, poi_type, chann cm = mpl.colormaps["gist_rainbow"] # add each systematic uncertainty i=0 - for syst in syst_labels[::-1]: + for syst in syst_labels:#[:200:-1]: if len(axes_names) > 1: hist_err_syst_i = hh.unrolledHist(hist_err_syst[{"syst": syst}], binwnorm=binwnorm, add_flow_bins=args.genFlow) else: @@ -435,138 +444,185 @@ def plot_uncertainties_unfolded(hist_xsec, hist_stat, hist_syst, poi_type, chann plt.close() - -def plot_uncertainties_ratio(df, df_ref, poi_type, poi_type_ref, channel=None, edges=None, scale=1., normalize=False, - logy=False, process_label="", axes=None, relative_uncertainty=False, lumi=None +def plot_uncertainties_with_ratio( + hist_xsec, hist_xsec_ref, poi_type, poi_type_ref, + hist_stat=None, hist_syst=None, hist_stat_ref=None, hist_syst_ref=None, + logy=False, relative_uncertainty=True, percentage=True, lumi=None, + channel="ch0", normalize=False, flow=False ): logger.info(f"Make "+("normalized " if normalize else "")+"unfoled xsec plot"+(f" in channel {channel}" if channel else "")) # read nominal values and uncertainties from fit result and fill histograms logger.debug(f"Produce histograms") - yLabel = f"ref.({poi_type_ref}) / nom.({poi_type}) -1" + yLabel = f"ref.({styles.poi_types[poi_type_ref]}) / nom.({styles.poi_types.get(poi_type, poi_type)}) -1" - if poi_type in ["mu", "nois"]: - bin_widths = 1.0 + yLabel = styles.poi_types.get(poi_type, poi_type) + if relative_uncertainty: + yLabel = "$\delta$ "+ yLabel + yLabel = yLabel.replace(" [pb]","") + if percentage: + yLabel += " [%]" else: - # central values - bin_widths = edges[1:] - edges[:-1] + yLabel = "$\Delta$ "+ yLabel + + axes_names = hist_xsec.axes.name + xlabel=get_xlabel(axes_names, proc) - errors = df["err_total"].values/bin_widths - errors_ref = df_ref["err_total"].values/bin_widths + scale = 100 if percentage else 1 if relative_uncertainty: - values = df["value"].values/bin_widths - values_ref = df_ref["value"].values/bin_widths - errors /= values - errors_ref /= values_ref + scale = scale / hist_xsec.values(flow=flow) - xx = errors_ref/errors -1 + err = np.sqrt(hist_xsec.variances(flow=flow)) * scale + hist_err = hist.Hist(*hist_xsec.axes, storage=hist.storage.Double()) + hist_err.view(flow=flow)[...] = err - hist_ratio = hist.Hist(hist.axis.Variable(edges, underflow=False, overflow=False)) - hist_ratio.view(flow=False)[...] = xx + err_ref = np.sqrt(hist_xsec_ref.variances(flow=flow)) * scale + hist_err_ref = hist.Hist(*hist_xsec.axes, storage=hist.storage.Double()) + hist_err_ref.view(flow=flow)[...] = err_ref + + if hist_stat is not None: + err_stat = np.sqrt(hist_stat.variances(flow=flow)) * scale + hist_err_stat = hist.Hist(*hist_stat.axes, storage=hist.storage.Double()) + hist_err_stat.view(flow=flow)[...] = err_stat + + err_stat_ref = np.sqrt(hist_stat_ref.variances(flow=flow)) * scale + hist_err_stat_ref = hist.Hist(*hist_stat.axes, storage=hist.storage.Double()) + hist_err_stat_ref.view(flow=flow)[...] = err_stat_ref + + if hist_syst is not None: + err_syst = hh.addHists(hist_syst, hist_xsec, scale2=-1).values(flow=flow) * (scale[...,np.newaxis] if relative_uncertainty else scale) + hist_err_syst = hist.Hist(*hist_syst.axes, storage=hist.storage.Double()) + hist_err_syst.view(flow=flow)[...] = err_syst + + err_syst_ref = hh.addHists(hist_syst_ref, hist_xsec, scale2=-1).values(flow=flow) * (scale[...,np.newaxis] if relative_uncertainty else scale) + hist_err_syst_ref = hist.Hist(*hist_syst.axes, storage=hist.storage.Double()) + hist_err_syst_ref.view(flow=flow)[...] = err_syst_ref + + # unroll histograms + binwnorm = 1 if not relative_uncertainty and poi_type not in ["noi", "mu",] else None + if len(axes_names) > 1: + hist_err = hh.unrolledHist(hist_err, binwnorm=binwnorm, add_flow_bins=args.genFlow) + hist_err_ref = hh.unrolledHist(hist_err_ref, binwnorm=binwnorm, add_flow_bins=args.genFlow) + if hist_stat is not None: + hist_err_stat = hh.unrolledHist(hist_err_stat, binwnorm=binwnorm, add_flow_bins=args.genFlow) + hist_err_stat_ref = hh.unrolledHist(hist_err_stat_ref, binwnorm=binwnorm, add_flow_bins=args.genFlow) - # make plots if args.ylim is None: if logy: - ylim = (max(xx)/10000., 1000 * max(xx)) + ylim = (max(hist_err.values())/10000., 1000 * max(hist_err.values())) else: - miny = min(xx) - maxy = max(xx) - rangey = maxy-miny - ylim = (miny-rangey*0.1, maxy+rangey*0.7) + ylim = (0, 1.1 * max(hist_err.values())) else: ylim = args.ylim - xlabel = "-".join([get_xlabel(a, process_label) for a in axes]) - if len(axes) >= 2: - xlabel = xlabel.replace("(GeV)","") - xlabel += "Bin" - - fig, ax1 = plot_tools.figure(hist_ratio, xlabel, yLabel, ylim, logy=logy, width_scale=2) - - ax1.plot([min(edges), max(edges)], [0.,0.], color="black", linestyle="-") + rlabel=f"{args.refName}/{args.name}" + rrange = args.rrange + # make plots + fig, ax1, ax2 = plot_tools.figureWithRatio(hist_err, xlabel, yLabel, ylim, rlabel, rrange, logy=logy, width_scale=2) hep.histplot( - hist_ratio, + [hist_err, hist_err_ref], yerr=False, histtype="step", - color="black", - label="Total", + color=["black", "black"], + linestyle=["solid", "dashed"], + label=[f"Total ({args.name})", f"Total ({args.refName})",], ax=ax1, alpha=1., - zorder=2, + # zorder=2, ) - uncertainties = make_yields_df([hist_ratio], ["Total"], per_bin=True, yield_only=True) - - - if args.genFlow: - ax1.fill([0,18.5, 18.5, 0,0], [ylim[0],*ylim,ylim[1],ylim[0]], color="grey", alpha=0.3) - ax1.fill([len(errors)-17.5, len(errors)+0.5, len(errors)+0.5, len(errors)-17.5, len(errors)-17.5], [ylim[0],*ylim,ylim[1],ylim[0]], color="grey", alpha=0.3) - - if grouping: - sources = ["err_"+g for g in grouping] - else: - sources =["err_stat"] - sources += list(sorted([s for s in filter(lambda x: x.startswith("err"), df.keys()) - if s.replace("err_","") not in ["stat", "total"] ])) # total and stat are added first - - NUM_COLORS = len(sources)-1 - cm = mpl.colormaps["gist_rainbow"] - # add each source of uncertainty - i=0 - for source in sources: - - name = source.replace("err_","") - - name = translate_label.get(name,name) - - if source =="err_stat": - color = "grey" - else: - color = cm(1.*i/NUM_COLORS) - i += 1 - - if i%3 == 0: - linestype = "-" - elif i%3 == 1: - linestype = "--" - else: - linestype = ":" - - hist_unc = hist.Hist(hist.axis.Variable(edges, underflow=False, overflow=False)) - - if source not in df: - logger.warning(f"Source {source} not found in dataframe") - continue - - errors = df[source].values/bin_widths - errors_ref = df_ref[source].values/bin_widths - if relative_uncertainty: - errors /= values - errors_ref /= values_ref - - logger.debug(f"Plot source {source}") + hep.histplot( + hh.divideHists(hist_err_ref, hist_err, cutoff=0, rel_unc=True), + yerr=False, + histtype="step", + color="black", + ax=ax2, + # zorder=2, + ) - hist_unc.view(flow=False)[...] = errors_ref/errors-1 + uncertainties = make_yields_df([hist_err], ["Total", ], per_bin=True, yield_only=True, percentage=percentage) + if hist_stat is not None: hep.histplot( - hist_unc, + [hist_err_stat, hist_err_stat_ref], yerr=False, histtype="step", - color=color, - linestyle=linestype, - label=name, + color=["grey", "grey"], + linestyle=["solid", "dashed"], + label=[f"{translate_label.get('stat', 'stat')} ({args.name})", f"{translate_label.get('stat', 'stat')} ({args.refName})"], ax=ax1, alpha=1., - zorder=2, + # zorder=2, ) + hep.histplot( + hh.divideHists(hist_err_stat_ref, hist_err_stat, cutoff=0, rel_unc=True), + yerr=False, + histtype="step", + color="grey", + ax=ax2, + # zorder=2, + ) - unc_df = make_yields_df([hist_unc], [name], per_bin=True, yield_only=True) - uncertainties[name] = unc_df[name] + uncertainties["stat"] = make_yields_df([hist_err_stat], ["stat"], per_bin=True, yield_only=True, percentage=percentage)["stat"] + + + edges = hist_err.axes.edges[0] + ax2.plot([min(edges), max(edges)], [1,1], color="black", linestyle="-") + if hist_syst is not None: + syst_labels = [x for x in hist_err_syst.axes["syst"] if (grouping is None or x in grouping) and x not in ["nominal", "stat", "total"]] + NUM_COLORS = len(syst_labels) - 2 + cm = mpl.colormaps["gist_rainbow"] + # add each systematic uncertainty + i=0 + for syst in syst_labels[::-1]: + if len(axes_names) > 1: + hist_err_syst_i = hh.unrolledHist(hist_err_syst[{"syst": syst}], binwnorm=binwnorm, add_flow_bins=args.genFlow) + else: + hist_err_syst_i = hist_err_syst[{"syst": syst}] + + if max(hist_err_syst_i.values()) < error_threshold: + logger.debug(f"Systematic {syst} smaller than threshold of {error_threshold}") + continue + name = translate_label.get(syst,syst) + logger.debug(f"Plot systematic {syst} = {name}") + + if syst =="err_stat": + color = "grey" + else: + color = cm(1.*i/NUM_COLORS) + i += 1 + + if i%3 == 0: + linestype = "-" + elif i%3 == 1: + linestype = "--" + else: + linestype = ":" + + hep.histplot( + hist_err_syst_i, + yerr=False, + histtype="step", + color=color, + linestyle=linestype, + label=name, + ax=ax1, + alpha=1., + # zorder=1, + ) + + unc_df = make_yields_df([hist_err_syst_i], [name], per_bin=True, yield_only=True, percentage=True) + uncertainties[name] = unc_df[name] + + if args.genFlow: + ax1.fill([0,18.5, 18.5, 0,0], [ylim[0],*ylim,ylim[1],ylim[0]], color="grey", alpha=0.3) + ax1.fill([hist_err.size-17.5, hist_err.size+0.5, hist_err.size+0.5, hist_err.size-17.5, hist_err.size-17.5], [ylim[0],*ylim,ylim[1],ylim[0]], color="grey", alpha=0.3) scale = max(1, np.divide(*ax1.get_figure().get_size_inches())*0.3) - plot_tools.addLegend(ax1, ncols=4, text_size=15*args.scaleleg*scale) + plot_tools.addLegend(ax1, ncols=4, text_size=15*args.scaleleg*scale, loc="upper left") + plot_tools.fix_axes(ax1, ax2, yscale=args.yscale) if args.yscale: ymin, ymax = ax1.get_ylim() @@ -574,37 +630,30 @@ def plot_uncertainties_ratio(df, df_ref, poi_type, poi_type_ref, channel=None, e if not logy: plot_tools.redo_axis_ticks(ax1, "y") - plot_tools.redo_axis_ticks(ax1, "x", no_labels=len(axes) >= 2) + plot_tools.redo_axis_ticks(ax1, "x", no_labels=len(axes_names) >= 2) hep.cms.label(ax=ax1, lumi=float(f"{lumi:.3g}") if lumi is not None else None, fontsize=20*args.scaleleg*scale, label=args.cmsDecor, data=not args.noData) - outfile = f"{input_subdir}_unfolded_uncertainties_ratio" - - if poi_type in ["mu","nois"]: - outfile += f"_{poi_type}" - if relative_uncertainty: - outfile += "_relative" - if normalize: - outfile += "_normalized" - if logy: - outfile += "_logy" - outfile += f"_{base_process}" - outfile += "_"+"_".join(axes) + outfile = f"diff_unfolded_{poi_type}{'_relative_' if relative_uncertainty else '_'}uncertainties" + outfile += f"_{proc}" + outfile += "_"+"_".join(axes_names) outfile += (f"_{channel}" if channel else "") + outfile += (f"_logy" if logy else "") + outfile += (f"_{args.postfix}" if args.postfix else "") + plot_tools.save_pdf_and_png(outdir, outfile) outfile += (f"_{args.postfix}" if args.postfix else "") plot_tools.save_pdf_and_png(outdir, outfile) - plot_tools.write_index_and_log(outdir, outfile, nround=4 if normalize else 2, - yield_tables={"Unfolded data uncertainty [%]": uncertainties}, - analysis_meta_info={"AnalysisOutput" : groups.getMetaInfo(), 'CardmakerOutput': meta_info}, + plot_tools.write_index_and_log(outdir, outfile, nround=2, + yield_tables={f"Unfolded{' relative' if relative_uncertainty else ''} uncertainty{' [%]' if percentage else ''}": uncertainties}, + analysis_meta_info={args.infile : meta["meta_info"]}, args=args, ) plt.close() - for poi_type, poi_result in result.items(): for channel, channel_result in poi_result.items(): @@ -614,7 +663,7 @@ def plot_uncertainties_ratio(df, df_ref, poi_type, poi_type_ref, channel=None, e histo_others=[] if args.histfile: - groups_dict = {"W_qGen0": "Wminus", "W_qGen1": "Wplus", "Z": "Zmumu"} + groups_dict = {"W": "Wmunu", "W_qGen0": "Wminus", "W_qGen1": "Wplus", "Z": "Zmumu"} group_name = groups_dict[proc] for syst in args.varNames: groups.loadHistsForDatagroups(args.baseName, syst=syst, procsToRead=[group_name], nominalIfMissing=False) @@ -623,12 +672,23 @@ def plot_uncertainties_ratio(df, df_ref, poi_type, poi_type_ref, channel=None, e for hist_name in filter(lambda x: not any([x.endswith(y) for y in ["_stat","_syst"]]), proc_result.keys()): hist_nominal = result[poi_type][channel][proc][hist_name] hist_stat = result[poi_type][channel][proc][f"{hist_name}_stat"] - hist_ref = result_ref[poi_type][channel][proc][hist_name] if args.reference else None + # reference model + if args.reference: + poi_type_ref = poi_type if args.poiTypeReference is None else args.poiTypeReference + # hist_ref = result_ref[poi_type_ref][channel][proc][hist_name] + # hist_ref_stat = result_ref[poi_type_ref][channel][proc][f"{hist_name}_stat"] + + # copy to remove overflow + hist_ref = hist_nominal.copy() + hist_ref.view()[...] = result_ref[poi_type_ref][channel][proc][hist_name].view() + hist_ref_stat = hist_nominal.copy() + hist_ref_stat.view()[...] = result_ref[poi_type_ref][channel][proc][f"{hist_name}_stat"].view() + if args.selectAxis and args.selectEntries: histo_others = [h[{k: v}] for h, k, v in zip(histo_others, args.selectAxis, args.selectEntries)] axes = hist_nominal.axes - hists_others = [h.project(*axes.name) for h in histo_others] + hists_others = [hh.projectNoFlow(h, axes.name) for h in histo_others] selection_axes = [a for a in axes if a.name in args.selectionAxes] if len(selection_axes) > 0: @@ -639,28 +699,49 @@ def plot_uncertainties_ratio(df, df_ref, poi_type, poi_type_ref, channel=None, e iterator = [None] for bins in iterator: + h_ref = None + h_ref_stat = None if bins == None: suffix = channel h_nominal = hist_nominal h_stat = hist_stat - h_ref = hist_ref + if args.reference: + h_ref = hist_ref + h_ref_stat = hist_ref_stat h_others = hists_others else: idxs = {a.name: i for a, i in zip(selection_axes, bins) } if len(other_axes) == 0: continue logger.info(f"Make plot for axes {[a.name for a in other_axes]}, in bins {idxs}") - suffix = f"{channel}_" + "_".join([f"{a}{i}" for a, i in idxs.items()]) + suffix = channel + for a, i in idxs.items(): + if isinstance(hist_nominal.axes[a], hist.axis.Integer): + label = int(hist_nominal.axes[a].edges[i]) + if a == "helicitySig": + if i == 0: + label = "SigmaUL" + else: + label = f"Sigma{label}" + else: + label = f"{a}{label}" + else: + label = f"{a}{i}" + suffix += f"_{label}" h_nominal = hist_nominal[idxs] h_stat = hist_stat[idxs] - h_ref = h_ref[idxs] if args.reference else None + if args.reference: + h_ref = hist_ref[idxs] + h_ref_stat = hist_ref_stat[idxs] h_others = [h[idxs] for h in hists_others] if "xsec" in args.plots: plot_xsec_unfolded(h_nominal, h_stat, h_ref, poi_type=poi_type, channel=suffix, proc=proc, lumi=lumi, hist_others=h_others, - label_others=args.varLabels, - color_others=args.colors + label_others=args.varLabels, + marker_others=args.varMarkers, + color_others=args.colors, + pulls=args.pulls ) if "uncertainties" in args.plots: @@ -669,13 +750,25 @@ def plot_uncertainties_ratio(df, df_ref, poi_type, poi_type_ref, channel=None, e h_systs = h_systs[{**idxs, "syst": slice(None)}] plot_uncertainties_unfolded(h_nominal, h_stat, h_systs, poi_type=poi_type, channel=suffix, proc = proc, lumi=lumi, - relative_uncertainty=False,#True, + relative_uncertainty=True, # normalize=args.normalize, relative_uncertainty=not args.absolute, logy=args.logy) - # if "ratio" in args.plots: - # plot_uncertainties_ratio(data_c, data_c_ref, poi_type, poi_type_ref, edges=edges, channel=channel, scale=scale, - # normalize=args.normalize, relative_uncertainty=not args.absolute, logy=args.logy, process_label = process_label, axes=channel_axes) + if "ratio" in args.plots: + poi_type_ref = poi_type if args.poiTypeReference is None else args.poiTypeReference + h_ref_systs = result_ref[poi_type_ref][channel][proc][f"{hist_name}_syst"] + if bins != None: + h_ref_systs = h_ref_systs[{**idxs, "syst": slice(None)}] + + plot_uncertainties_with_ratio(h_nominal, h_ref, + poi_type=poi_type, poi_type_ref=poi_type_ref, + hist_stat=h_stat, hist_stat_ref=h_ref_stat, + # hist_syst=h_syst, hist_syst_ref=h_ref_syst, + channel=channel, + # normalize=args.normalize, relative_uncertainty=not args.absolute, + logy=args.logy, + # process_label = process_label, axes=channel_axes + ) if output_tools.is_eosuser_path(args.outpath) and args.eoscp: output_tools.copy_to_eos(outdir, args.outpath, args.outfolder) diff --git a/scripts/utilities/fitresult_pois_to_hist.py b/scripts/utilities/fitresult_pois_to_hist.py index e954e3fed..e3b613a5e 100644 --- a/scripts/utilities/fitresult_pois_to_hist.py +++ b/scripts/utilities/fitresult_pois_to_hist.py @@ -21,7 +21,7 @@ raise IOError(f"Result from expected or observed fit must be specified with '--observed' or '--expected'") result = {} meta = None - +meta_exp = None if args.observed: result, meta = fitresult_pois_to_hist(args.observed.replace(".root",".hdf5"), result, uncertainties=None) if args.expected: diff --git a/utilities/common.py b/utilities/common.py index 37056a391..8a99ed5ea 100644 --- a/utilities/common.py +++ b/utilities/common.py @@ -238,11 +238,11 @@ def set_subparsers(subparser, name, analysis_label): if analysis_label not in axmap: raise ValueError(f"Unknown analysis {analysis_label}!") subparser.add_argument("--genAxes", type=str, nargs="+", - default=axmap[analysis_label], choices=["qGen", "ptGen", "absEtaGen", "ptVGen", "absYVGen"], + default=axmap[analysis_label], choices=["qGen", "ptGen", "absEtaGen", "ptVGen", "absYVGen", "helicitySig"], help="Generator level variable") subparser.add_argument("--genLevel", type=str, default='postFSR', choices=["preFSR", "postFSR"], help="Generator level definition for unfolding") - subparser.add_argument("--genBins", type=int, nargs="+", default=[18, 0] if "wlike" in analysis_label[0] else [16, 0], + subparser.add_argument("--genBins", type=int, nargs="+", default=[18, 0] if "wlike" in analysis_label else [16, 0], help="Number of generator level bins") subparser.add_argument("--inclusive", action='store_true', help="No fiducial selection (mass window only)") elif "theoryAgnostic" in name: diff --git a/utilities/io_tools/conversion_tools.py b/utilities/io_tools/conversion_tools.py index c6ee36270..412cbffe9 100644 --- a/utilities/io_tools/conversion_tools.py +++ b/utilities/io_tools/conversion_tools.py @@ -10,23 +10,45 @@ logger = logging.child_logger(__name__) -def fitresult_pois_to_hist(infile, result=None, poi_types = ["mu", "pmaskedexp", "pmaskedexpnorm", "sumpois", "sumpoisnorm", ], translate_poi_types=True, - merge_channels=True, grouped=True, uncertainties=None, expected=False, -): - # convert POIs in fitresult into histograms - # uncertainties, use None to get all, use [] to get none - # grouped=True for grouped uncertainties - # Different channels can have different year, flavor final state, particle final state, sqrt(s), - # if merge_channels=True the lumi is added up for final states with different flavors or eras with same sqrt(s) - - # translate the name of the keys to be written out - target_keys={ - "pmaskedexp": "xsec", - "sumpois": "xsec", - "pmaskedexpnorm": "xsec_normalized", - "sumpoisnorm": "xsec_normalized", - } - +def transform_poi(poi_type, meta): + # scale poi type, in case of noi, convert into poi like form + if poi_type in ["nois"]: + prior_norm = meta["args"]["priorNormXsec"] + scale_norm = meta["args"]["scaleNormXsecHistYields"] if meta["args"]["scaleNormXsecHistYields"] is not None else 1 + val = lambda theta, scale, k=prior_norm, x=scale_norm: (1+x)**(k * theta) * scale + err = lambda theta, err, scale, k=prior_norm, x=scale_norm: (k*np.log(1+x) * (1+x)**(k * theta)) * scale * err + + # val = lambda theta, scale, k=prior_norm, x=scale_norm: (1 + x * k * theta) * scale + # err = lambda theta, scale, k=prior_norm, x=scale_norm: (x * k * theta) * scale + return val, err + else: + val = lambda poi, scale: poi * scale + err = lambda poi, err, scale: err * scale + return val, err + +def expand_flow(val, axes, flow_axes, var=None): + # expand val and var arrays by ones for specified flow_axes in order to broadcast them into the histogram + for a in axes: + if a.name not in flow_axes: + if a.traits.underflow: + s_ = [s for s in val.shape] + s_[axes.index(a)] = 1 + val = np.append(np.ones(s_), val, axis=axes.index(a)) + if var is not None: + var = np.append(np.ones(s_), var, axis=axes.index(a)) + if a.traits.overflow: + s_ = [s for s in val.shape] + s_[axes.index(a)] = 1 + val = np.append(val, np.ones(s_), axis=axes.index(a)) + if var is not None: + var = np.append(var, np.ones(s_), axis=axes.index(a)) + if var is None: + return val + else: + return val, var + +def combine_channels(meta, merge_gen_charge_W): + # merge channels with same energy channel_energy={ "2017G": "5TeV", "2017H": "13TeV", @@ -35,6 +57,7 @@ def fitresult_pois_to_hist(infile, result=None, poi_types = ["mu", "pmaskedexp", "2017": "13TeV", "2018": "13TeV", } + # merge electron and muon into lepton channels channel_flavor ={ "e": "l", "mu": "l", @@ -42,33 +65,74 @@ def fitresult_pois_to_hist(infile, result=None, poi_types = ["mu", "pmaskedexp", "mumu": "ll", } + channel_info = {} + for chan, info in meta["channel_info"].items(): + if chan.endswith("masked"): + continue + channel = f"chan_{channel_energy[info['era']]}" + if info['flavor'] in channel_flavor: + channel += f"_{channel_flavor[info['flavor']]}" + + logger.debug(f"Merge channel {chan} into {channel}") + + lumi = info["lumi"] + gen_axes = info["gen_axes"] + + if merge_gen_charge_W: + if "W_qGen0" not in gen_axes or "W_qGen1" not in gen_axes: + logger.debug("Can't merge W, it requires W_qGen0 and W_qGen1 as separate processes") + elif gen_axes["W_qGen0"] != gen_axes["W_qGen1"]: + raise RuntimeError("Axes for different gen charges are diffenret, gen charges can't be merged") + else: + logger.debug("Merge W_qGen0 and W_qGen1 into W with charge axis") + axis_qGen = hist.axis.Regular(2, -2., 2., underflow=False, overflow=False, name = "qGen") + gen_axes = { + "W": [*gen_axes["W_qGen0"], axis_qGen], + **{k:v for k,v in gen_axes.items() if k not in ["W_qGen0", "W_qGen1"]} + } + # gen_axes["W"] = [*gen_axes["W_qGen0"], axis_qGen] + + if channel not in channel_info: + channel_info[channel] = { + "gen_axes": gen_axes, + "lumi": lumi, + } + elif gen_axes == channel_info[channel]["gen_axes"]: + channel_info[channel]["lumi"] += lumi + else: + channel_info[channel]["gen_axes"].update(gen_axes) + + return channel_info + +def fitresult_pois_to_hist(infile, result=None, poi_types = None, translate_poi_types=True, + merge_channels=True, grouped=True, uncertainties=None, expected=False, merge_gen_charge_W=True, +): + # convert POIs in fitresult into histograms + # uncertainties, use None to get all, use [] to get none + # grouped=True for grouped uncertainties + # Different channels can have different year, flavor final state, particle final state, sqrt(s), + # if merge_channels=True the lumi is added up for final states with different flavors or eras with same sqrt(s) + + # translate the name of the keys to be written out + target_keys={ + "pmaskedexp": "xsec", + "sumpois": "xsec", + "pmaskedexpnorm": "xsec_normalized", + "sumpoisnorm": "xsec_normalized", + } + fitresult = combinetf_input.get_fitresult(infile.replace(".root",".hdf5")) meta = ioutils.pickle_load_h5py(fitresult["meta"]) meta_info = meta["meta_info"] + if poi_types is None: + if meta_info["args"]["poiAsNoi"]: + poi_types = ["nois", "pmaskedexp", "pmaskedexpnorm", "sumpois", "sumpoisnorm", "ratiometapois"] + else: + poi_types = ["mu", "pmaskedexp", "pmaskedexpnorm", "sumpois", "sumpoisnorm", "ratiometapois"] + if merge_channels: - channel_info = {} - for chan, info in meta["channel_info"].items(): - if chan.endswith("masked"): - continue - channel = f"chan_{channel_energy[info['era']]}" - if info['flavor'] in channel_flavor: - channel += f"_{channel_flavor[info['flavor']]}" - - logger.debug(f"Merge channel {chan} into {channel}") - - lumi = info["lumi"] - gen_axes = info["gen_axes"] - - if channel not in channel_info: - channel_info[channel] = { - "gen_axes": gen_axes, - "lumi": lumi, - } - else: - if gen_axes != channel_info[channel]["gen_axes"]: - raise RuntimeError(f"The gen axes are different among channels {channel_info}, so they can't be merged") - channel_info[channel]["lumi"] += lumi + channel_info = combine_channels(meta, merge_gen_charge_W) else: channel_info = meta["channel_info"] @@ -78,13 +142,14 @@ def fitresult_pois_to_hist(infile, result=None, poi_types = ["mu", "pmaskedexp", logger.debug(f"Now at POI type {poi_type}") df = combinetf_input.read_impacts_pois(fitresult, poi_type=poi_type, group=grouped, uncertainties=uncertainties) - if df is None: + if df is None or len(df)==0: logger.warning(f"POI type {poi_type} not found in histogram, continue with next one") continue - scale = 1 - if poi_type in ["nois"]: - scale = 1./(imeta["args"]["scaleNormXsecHistYields"]*imeta["args"]["priorNormXsec"]) + # find all axes where the flow bins are included in the unfolding, needed for correct reshaping + flow_axes = list(set([l for i in df["Name"].apply(lambda x: [i[:-1] for i in x.split("_")[1:-1] if len(i) and i[-1] in ["U","O"]]).values if i for l in i])) + + action_val, action_err = transform_poi(poi_type, meta_info) poi_key = target_keys.get(poi_type, poi_type) if translate_poi_types else poi_type if poi_key not in result: @@ -92,17 +157,18 @@ def fitresult_pois_to_hist(infile, result=None, poi_types = ["mu", "pmaskedexp", for channel, info in channel_info.items(): logger.debug(f"Now at channel {channel}") - channel_scale = scale if poi_type in ["pmaskedexp", "sumpois"]: - channel_scale = info["lumi"]*1000 + channel_scale = 1./(info["lumi"]*1000) + else: + channel_scale = 1 if channel not in result[poi_key]: result[poi_key][channel] = {} for proc, gen_axes_proc in info["gen_axes"].items(): logger.debug(f"Now at proc {proc}") - if poi_type.startswith("sum"): - if len(gen_axes_proc)==1: - logger.info(f"Skip POI type {poi_type} since there is only one gen axis") + if any(poi_type.startswith(x) for x in ["sum", "ratio", "chargemeta", "helmeta"]): + if len(gen_axes_proc)<=1: + logger.info(f"Skip POI type {poi_type} since there is only {len(gen_axes_proc)} gen axis") continue # make all possible lower dimensional gen axes combinations; wmass only combinations including qGen gen_axes_permutations = [list(k) for n in range(1, len(gen_axes_proc)) for k in itertools.combinations(gen_axes_proc, n)] @@ -111,30 +177,62 @@ def fitresult_pois_to_hist(infile, result=None, poi_types = ["mu", "pmaskedexp", if proc not in result[poi_key][channel]: result[poi_key][channel][proc] = {} + for axes in gen_axes_permutations: - shape = [a.extent for a in axes] + logger.debug(f"Now at axes {axes}") + + if poi_type in ["helpois", "helmetapois"]: + if "helicitySig" not in [a.name for a in axes]: + continue + # replace helicity cross section axis by angular coefficient axis + axis_coeffs = hist.axis.Integer(0, 8, name="A", overflow=False, underflow=False) + axes = [axis_coeffs if a.name == "helicitySig" else a for a in axes] + axes_names = [a.name for a in axes] + shape = [a.extent if a.name in flow_axes else a.size for a in axes] + + # TODO: clean up hard coded treatment for cross section ratios + if poi_type == "ratiometapois": + if proc not in ["W_qGen0", "r_qGen_W"]: + continue - data = combinetf_input.select_pois(df, axes_names, base_processes=proc, flow=True) + proc = "r_qGen_W" + if proc not in result[poi_key][channel]: + result[poi_key][channel][proc] = {} + + data = combinetf_input.select_pois(df, axes_names, base_processes=proc, flow=True) + data = data.loc[data["Name"].apply(lambda x: not x.endswith("totalxsec"))] + else: + data = combinetf_input.select_pois(df, axes_names, base_processes=proc, flow=True) + + for u in filter(lambda x: x.startswith("err_"), data.keys()): + data.loc[:,u] = action_err(data["value"], data[u], channel_scale) + data.loc[:,"value"] = action_val(data["value"], channel_scale) + if len(data['value']) <= 0: + logger.debug(f"No values found for the hist in poi {poi_key}, continue with next one") + continue logger.debug(f"The values for the hist in poi {poi_key} are {data['value'].values}") - values = np.reshape(data["value"].values/channel_scale, shape) - variances = np.reshape( (data["err_total"].values/channel_scale)**2, shape) + values = np.reshape(data["value"].values, shape) + variances = np.reshape(data["err_total"].values**2, shape) + values, variances = expand_flow(values, axes, flow_axes, variances) + # save nominal histogram with total uncertainty h_ = hist.Hist(*axes, storage=hist.storage.Weight()) h_.view(flow=True)[...] = np.stack([values, variances], axis=-1) hist_name = "hist_" + "_".join(axes_names) if expected: hist_name+= "_expected" - logger.info(f"Save histogram {hist_name}") + logger.info(f"Save histogram {hist_name} for proc {proc} in channel {channel} and type {poi_key}") if hist_name in result[poi_key][channel][proc]: logger.warning(f"Histogram {hist_name} already in result, it will be overridden") result[poi_key][channel][proc][hist_name] = h_ + # save nominal histogram with stat uncertainty if "err_stat" in data.keys(): - # save stat only hist - variances = np.reshape( (data["err_stat"].values/channel_scale)**2, shape) + variances = np.reshape(data["err_stat"].values, shape)**2 + variances = expand_flow(variances, axes, flow_axes) h_stat = hist.Hist(*axes, storage=hist.storage.Weight()) h_stat.view(flow=True)[...] = np.stack([values, variances], axis=-1) hist_name_stat = f"{hist_name}_stat" @@ -145,8 +243,13 @@ def fitresult_pois_to_hist(infile, result=None, poi_types = ["mu", "pmaskedexp", # save other systematic uncertainties as separately varied histograms labels = [u.replace("err_","") for u in filter(lambda x: x.startswith("err_") and x not in ["err_total", "err_stat"], data.keys())] if labels: - # string category always has an overflow bin, we set it to 0 - systs = np.stack([values, *[values + np.reshape(data[f"err_{u}"].values/channel_scale, shape) for u in labels], np.zeros_like(values)], axis=-1) + errs = [] + for u in labels: + err = np.reshape(data[f"err_{u}"].values, shape) + err = expand_flow(err, axes, flow_axes) + errs.append(values + err) + systs = np.stack([values, *errs], axis=-1) + systs = np.append(systs, np.zeros_like(values)[...,None], axis=-1) # overflow axis can't be disabled in StrCategory h_syst = hist.Hist(*axes, hist.axis.StrCategory(["nominal", *labels], name="syst"), storage=hist.storage.Double()) h_syst.values(flow=True)[...] = systs hist_name_syst = f"{hist_name}_syst" diff --git a/utilities/styles/styles.py b/utilities/styles/styles.py index ee19e73c2..869663f90 100644 --- a/utilities/styles/styles.py +++ b/utilities/styles/styles.py @@ -108,12 +108,15 @@ "Total", "stat", "binByBinStat", + "statMC", "luminosity", "recoil", "CMS_background", "theory_ew", "normXsecW", - "width" + "width", + "ZmassAndWidth", + "massAndWidth" ] nuisance_groupings = { "super":[ @@ -125,7 +128,6 @@ "muonCalibration", ], "max": common_groups + [ - "massShift", "QCDscale", "pdfCT18Z", "resum", @@ -149,10 +151,14 @@ ], "unfolding_max": [ "Total", + "stat", + "binByBinStat", + "experiment", "QCDscale", "pdfCT18Z", "resum", "theory_ew", + "bcQuarkMass", ], "unfolding_min": [ "Total", @@ -180,6 +186,9 @@ "sumpois": "d$\sigma$ [pb]", "pmaskedexpnorm": "1/$\sigma$ d$\sigma$", "sumpoisnorm": "1/$\sigma$ d$\sigma$", + "ratiometapois": "$\sigma(W^{+})/\sigma(W^{-})$", + "helpois": "Ai", + "helmetapois": "Ai", } axis_labels = { @@ -270,6 +279,15 @@ def get_systematics_label(key, idx=0): if key in systematics_labels_idxs: return systematics_labels_idxs[key][idx] + if "helicity" in key.split("_")[-1]: + idx =int(key.split("_")[-1][-1]) + if idx == 0: + label = "UL" + else: + label = str(idx-1) + + return f"$\pm\sigma_\mathrm{{{label}}}$" + # default return key logger.info(f"No label found for {key}") return key diff --git a/wremnants/HDF5Writer.py b/wremnants/HDF5Writer.py index d75bc297f..e49606ed4 100644 --- a/wremnants/HDF5Writer.py +++ b/wremnants/HDF5Writer.py @@ -20,7 +20,6 @@ class HDF5Writer(object): # keeps multiple card tools and writes them out in a single file to fit (appending the histograms) def __init__(self, card_name="card", sparse=False): self.cardName = card_name - self.cardTools = [] # settings for writing out hdf5 files self.dtype="float64" self.chunkSize=4*1024**2 @@ -32,9 +31,12 @@ def __init__(self, card_name="card", sparse=False): self.theoryFitMCStat = True # Whether or not to include the MC stat uncertainty in the thoery fit (in the covariance matrix) self.dict_noigroups = defaultdict(lambda: set()) + self.dict_noigroups_masked = defaultdict(lambda: set()) self.dict_systgroups = defaultdict(lambda: set()) self.systsstandard = set() + self.systsnoi = set() + self.systsnoimasked = set() self.systsnoconstraint = set() self.systsnoprofile = set() @@ -91,7 +93,7 @@ def set_fitresult(self, fitresult_filename, poi_type="pmaskedexp", gen_flow=Fals logger.warning("Theoryfit for more than one channels is currently experimental") self.theoryFit = True self.theoryFitMCStat = mc_stat - base_processes = ["W" if c.datagroups.mode[0] == "w" else "Z" for c in self.get_channels().values()] + base_processes = ["W" if c.datagroups.mode == "w_mass" else "Z" for c in self.get_channels().values()] axes = [c.fit_axes for c in self.get_channels().values()] fitresult = combinetf_input.get_fitresult(fitresult_filename) data, self.theoryFitDataCov = combinetf_input.get_theoryfit_data(fitresult, axes=axes, base_processes=base_processes, poi_type=poi_type, flow=gen_flow) @@ -122,7 +124,7 @@ def get_backgrounds(self): bkgs.update([p for p in chanInfo.predictedProcesses() if p not in chanInfo.unconstrainedProcesses]) return list(common.natural_sort(bkgs)) - def get_flat_values(self, h, chanInfo, axes, return_variances=True): + def get_flat_values(self, h, chanInfo, axes, return_variances=True, flow=False): # check if variances are available if return_variances and (h.storage_type != hist.storage.Weight): raise RuntimeError(f"Sumw2 not filled for {h} but needed for binByBin uncertainties") @@ -133,11 +135,11 @@ def get_flat_values(self, h, chanInfo, axes, return_variances=True): h = h.project(*axes) if return_variances: - val = h.values(flow=False).flatten().astype(self.dtype) - var = h.variances(flow=False).flatten().astype(self.dtype) + val = h.values(flow=flow).flatten().astype(self.dtype) + var = h.variances(flow=flow).flatten().astype(self.dtype) return val, var else: - return h.values(flow=False).flatten().astype(self.dtype) + return h.values(flow=flow).flatten().astype(self.dtype) def write(self, args, @@ -168,8 +170,8 @@ def write(self, dg = chanInfo.datagroups if masked: self.masked_channels.append(chan) - axes = ["count"] - nbinschan = 1 + axes = chanInfo.fit_axes[:] + nbinschan = 1 if len(axes) == 1 and axes[0] == "count" else None else: axes = chanInfo.fit_axes[:] nbinschan = None @@ -239,6 +241,11 @@ def write(self, # release memory del chanInfo.pseudodata_datagroups + for proc in chanInfo.datagroups.groups: + for pseudo in chanInfo.pseudoData: + if pseudo in dg.groups[proc].hists: + logger.debug(f"Delete pseudodata histogram {pseudo}") + del dg.groups[proc].hists[pseudo] # nominal predictions (after pseudodata because some pseudodata changes the nominal model) for proc in procs_chan: @@ -249,12 +256,16 @@ def write(self, if not masked: norm_proc, sumw2_proc = self.get_flat_values(norm_proc_hist, chanInfo, axes) + if proc in chanInfo.noStatUncProcesses: + logger.info(f"Skip sumw2 for proc {proc}") + sumw2_proc = 0 else: norm_proc = self.get_flat_values(norm_proc_hist, chanInfo, axes, return_variances=False) if nbinschan is None: nbinschan = norm_proc.shape[0] - nbins += nbinschan + if not masked: + nbins += nbinschan elif nbinschan != norm_proc.shape[0]: raise Exception(f"Mismatch between number of bins in channel {chan} and process {proc} for expected ({nbinschan}) and ({norm_proc.shape[0]})") @@ -275,9 +286,7 @@ def write(self, if not masked: # data - if self.theoryFit: - if self.theoryFitData is None or self.theoryFitDataCov is None: - raise RuntimeError("No data or covariance found to perform theory fit") + if self.theoryFit and self.theoryFitData is not None and self.theoryFitDataCov is not None: data_obs = self.theoryFitData[chan] elif chanInfo.real_data and dg.dataName in dg.groups: data_obs_hist = dg.groups[dg.dataName].hists[chanInfo.nominalName] @@ -345,7 +354,7 @@ def write(self, if asymmetric: self.book_logk_halfdiff(logkhalfdiff_proc, chan, proc, var_name) - self.book_systematic(syst, var_name) + self.book_systematic(syst, var_name, masked=masked) # shape systematics for systKey, syst in chanInfo.systematics.items(): @@ -443,7 +452,7 @@ def get_logk(histname, var_type=""): #special case, book the extra systematic self.book_logk_avg(logkdiffavg_proc, chan, proc, var_name_out_diff) - self.book_systematic(syst, var_name_out_diff) + self.book_systematic(syst, var_name_out_diff, masked=masked) else: logkup_proc = get_logk(var_name, "Up") logkdown_proc = -get_logk(var_name, "Down") @@ -457,7 +466,7 @@ def get_logk(histname, var_type=""): self.book_logk_halfdiff(logkhalfdiff_proc, chan, proc, var_name_out) self.book_logk_avg(logkavg_proc, chan, proc, var_name_out) - self.book_systematic(syst, var_name_out) + self.book_systematic(syst, var_name_out, masked=masked) # free memory for var in var_map.keys(): @@ -708,9 +717,10 @@ def get_logk(histname, var_type=""): systsnoprofile = self.get_systsnoprofile() systsnoconstraint = self.get_systsnoconstraint() - noigroups, noigroupidxs = self.get_noigroups() + maskednoigroups, maskednoigroupidxs = self.get_noigroups(masked=True) systgroups, systgroupidxs = self.get_systgroups() + poigroups = self.get_systsnoimasked() if hasattr(args, "poiAsNoi") and args.poiAsNoi else procs sumgroups, sumgroupsegmentids, sumgroupidxs = self.get_sumgroups(procs) chargegroups, chargegroupidxs = self.get_chargegroups() polgroups, polgroupidxs = self.get_polgroups() @@ -736,20 +746,20 @@ def create_dataset(name, content, length=None, dtype=h5py.special_dtype(vlen=str create_dataset("systgroups", systgroups) create_dataset("systgroupidxs", systgroupidxs, dtype=h5py.special_dtype(vlen=np.dtype('int32'))) create_dataset("chargegroups", chargegroups) - create_dataset("chargegroupidxs", chargegroups, 2, dtype='int32') + create_dataset("chargegroupidxs", chargegroupidxs, 2, dtype='int32') create_dataset("polgroups", polgroups) - create_dataset("polgroupidxs", polgroups, 3, dtype='int32') + create_dataset("polgroupidxs", polgroupidxs, 3, dtype='int32') create_dataset("helgroups", helgroups) - create_dataset("helgroupidxs", helgroups, 6, dtype='int32') + create_dataset("helgroupidxs", helgroupidxs, 6, dtype='int32') create_dataset("sumgroups", sumgroups) create_dataset("sumgroupsegmentids", sumgroupsegmentids, dtype='int32') create_dataset("sumgroupidxs", sumgroupidxs, dtype='int32') create_dataset("chargemetagroups", chargemetagroups) - create_dataset("chargemetagroupidxs", chargemetagroups, 2, dtype='int32') + create_dataset("chargemetagroupidxs", chargemetagroupidxs, 2, dtype='int32') create_dataset("ratiometagroups", ratiometagroups) - create_dataset("ratiometagroupidxs", ratiometagroups, 2, dtype='int32') + create_dataset("ratiometagroupidxs", ratiometagroupidxs, 2, dtype='int32') create_dataset("helmetagroups", helmetagroups) - create_dataset("helmetagroupidxs", helmetagroups, 6, dtype='int32') + create_dataset("helmetagroupidxs", helmetagroupidxs, 6, dtype='int32') create_dataset("reggroups", reggroups) create_dataset("reggroupidxs", reggroupidxs, dtype=h5py.special_dtype(vlen=np.dtype('int32'))) create_dataset("poly1dreggroups", poly1dreggroups) @@ -767,6 +777,7 @@ def create_dataset(name, content, length=None, dtype=h5py.special_dtype(vlen=str create_dataset("noigroups", noigroups) create_dataset("noigroupidxs", noigroupidxs, dtype='int32') create_dataset("maskedchans", self.masked_channels) + create_dataset("maskednoigroupidxs", maskednoigroupidxs, dtype='int32') create_dataset("pseudodatanames", pseudoDataNames) #create h5py datasets with optimized chunk shapes @@ -782,7 +793,7 @@ def create_dataset(name, content, length=None, dtype=h5py.special_dtype(vlen=str nbytes += writeFlatInChunks(pseudodata, f, "hpseudodata", maxChunkBytes = self.chunkSize) pseudodata = None - if self.theoryFit: + if self.theoryFitDataCov is not None: data_cov = self.theoryFitDataCov if data_cov.shape != (nbins,nbins): raise RuntimeError(f"covariance matrix has incompatible shape of {data_cov.shape}, expected is {(nbins,nbins)}!") @@ -827,11 +838,15 @@ def book_logk(self, dict_logk, dict_logk_indices, dict_logk_values, logk, chan, else: dict_logk[chan][proc][syst_name] = logk - def book_systematic(self, syst, name): + def book_systematic(self, syst, name, masked=False): logger.debug(f"book systematic {name}") if syst.get('noProfile', False): self.systsnoprofile.add(name) - elif syst.get("noConstraint", False) or syst.get("noi", False): + elif syst.get("noi", False): + self.systsnoi.add(name) + if masked: + self.systsnoimasked.add(name) + elif syst.get("noConstraint", False): self.systsnoconstraint.add(name) else: self.systsstandard.add(name) @@ -845,6 +860,13 @@ def book_systematic(self, syst, name): split_groups = syst.get("splitGroup", {group: re.compile(".*")}) matched_groups = [grp for grp, matchre in split_groups.items() if matchre.match(name)] + if syst.get("noi", False) and not masked: + target_dict = self.dict_noigroups + elif syst.get("noi", False): + target_dict = self.dict_noigroups_masked + else: + target_dict = self.dict_systgroups + target_dict = self.dict_noigroups if syst.get("noi", False) else self.dict_systgroups for matched_group in matched_groups: @@ -856,12 +878,18 @@ def get_systsstandard(self): def get_systsnoprofile(self): return list(common.natural_sort(self.systsnoprofile)) + def get_systsnoi(self): + return list(common.natural_sort(self.systsnoi)) + def get_systsnoconstraint(self): - return list(common.natural_sort(self.systsnoconstraint)) + return self.get_systsnoi() + list(common.natural_sort(self.systsnoconstraint)) def get_systs(self): return self.get_systsnoconstraint() + self.get_systsstandard() + self.get_systsnoprofile() + def get_systsnoimasked(self): + return list(common.natural_sort(self.systsnoimasked)) + def get_constraintweights(self, dtype): systs = self.get_systs() constraintweights = np.ones([len(systs)], dtype=dtype) @@ -881,12 +909,13 @@ def get_groups(self, group_dict): idxs.append(idx) return groups, idxs - def get_noigroups(self): + def get_noigroups(self, masked=False): #list of groups of systematics to be treated as additional outputs for impacts, etc (aka "nuisances of interest") systs = self.get_systs() + groupdict = self.dict_noigroups if masked is False else self.dict_noigroups_masked groups = [] idxs = [] - for group, members in common.natural_sort_dict(self.dict_noigroups).items(): + for group, members in common.natural_sort_dict(groupdict).items(): groups.append(group) for syst in members: idxs.append(systs.index(syst)) diff --git a/wremnants/combine_helpers.py b/wremnants/combine_helpers.py index f2152f1bb..8ce1395c4 100644 --- a/wremnants/combine_helpers.py +++ b/wremnants/combine_helpers.py @@ -1,10 +1,62 @@ -from utilities import common, logging +from utilities import boostHistHelpers as hh, common, logging from utilities.io_tools import input_tools import numpy as np +import hist +import re logger = logging.child_logger(__name__) + +def add_mass_diff_variations(cardTool, **kwargs): + mass_diff_args = kwargs.copy() + if args.fitMassDiff == "charge": + cardTool.addSystematic(**mass_diff_args, + # # on gen level based on the sample, only possible for mW + # preOpMap={m.name: (lambda h, swap=swap_bins: swap(h, "massShift", f"massShift{label}50MeVUp", f"massShift{label}50MeVDown")) + # for g in cardTool.procGroups[signal_samples_forMass[0]] for m in cardTool.datagroups.groups[g].members if "minus" in m.name}, + # on reco level based on reco charge + preOpMap={m.name: (lambda h: + hh.swap_histogram_bins(h, "massShift", f"massShift{label}50MeVUp", f"massShift{label}50MeVDown", "charge", 0) + ) for g in cardTool.procGroups[signal_samples_forMass[0]] for m in cardTool.datagroups.groups[g].members}, + ) + elif args.fitMassDiff == "cosThetaStarll": + cardTool.addSystematic(**mass_diff_args, + preOpMap={m.name: (lambda h: + hh.swap_histogram_bins(h, "massShift", f"massShift{label}50MeVUp", f"massShift{label}50MeVDown", "cosThetaStarll", hist.tag.Slicer()[0:complex(0,0):]) + ) for g in cardTool.procGroups[signal_samples_forMass[0]] for m in cardTool.datagroups.groups[g].members}, + ) + elif args.fitMassDiff == "eta-sign": + cardTool.addSystematic(**mass_diff_args, + preOpMap={m.name: (lambda h: + hh.swap_histogram_bins(h, "massShift", f"massShift{label}50MeVUp", f"massShift{label}50MeVDown", "eta", hist.tag.Slicer()[0:complex(0,0):]) + ) for g in cardTool.procGroups[signal_samples_forMass[0]] for m in cardTool.datagroups.groups[g].members}, + ) + elif args.fitMassDiff == "eta-range": + cardTool.addSystematic(**mass_diff_args, + preOpMap={m.name: (lambda h: + hh.swap_histogram_bins(h, "massShift", f"massShift{label}50MeVUp", f"massShift{label}50MeVDown", "eta", hist.tag.Slicer()[complex(0,-0.9):complex(0,0.9):]) + ) for g in cardTool.procGroups[signal_samples_forMass[0]] for m in cardTool.datagroups.groups[g].members}, + ) + elif args.fitMassDiff.startswith("etaRegion"): + # 3 bins, use 3 unconstrained parameters: mass; mass0 - mass2; mass0 + mass2 - mass1 + mass_diff_args["rename"] = f"massDiff1{suffix}{label}" + mass_diff_args["systNameReplace"] = [("Shift",f"Diff1{suffix}")] + cardTool.addSystematic(**mass_diff_args, + preOpMap={m.name: (lambda h: hh.swap_histogram_bins( + hh.swap_histogram_bins(h, "massShift", f"massShift{label}50MeVUp", f"massShift{label}50MeVDown", args.fitMassDiff, 2), # invert for mass2 + "massShift", f"massShift{label}50MeVUp", f"massShift{label}50MeVDown", args.fitMassDiff, 1, axis1_replace=f"massShift{label}0MeV") # set mass1 to nominal + ) for g in cardTool.procGroups[signal_samples_forMass[0]] for m in cardTool.datagroups.groups[g].members}, + ) + mass_diff_args["rename"] = f"massDiff2{suffix}{label}" + mass_diff_args["systNameReplace"] = [("Shift",f"Diff2{suffix}")] + cardTool.addSystematic(**mass_diff_args, + preOpMap={m.name: (lambda h: + hh.swap_histogram_bins(h, "massShift", f"massShift{label}50MeVUp", f"massShift{label}50MeVDown", args.fitMassDiff, 1) + ) for g in cardTool.procGroups[signal_samples_forMass[0]] for m in cardTool.datagroups.groups[g].members}, + ) + + def add_recoil_uncertainty(card_tool, samples, passSystToFakes=False, pu_type="highPU", flavor="", group_compact=True): met = input_tools.args_from_metadata(card_tool, "met") if flavor == "": @@ -32,6 +84,62 @@ def add_recoil_uncertainty(card_tool, samples, passSystToFakes=False, pu_type="h ) +def add_explicit_MCstat(cardTool, recovar, samples='signal_samples', wmass=False, source=None): + """ + add explicit bin by bin stat uncertainties + Parameters: + source (tuple of str): take variations from histogram with name given by f"{source[0]}_{source[1]}" (E.g. used to correlate between masked channels). + If None, use variations from nominal histogram + """ + datagroups = cardTool.datagroups + + recovar_syst = [f"_{n}" for n in recovar] + info=dict( + baseName="MCstat_"+"_".join(cardTool.procGroups[samples])+"_", + rename="statMC", + group=f"statMC", + passToFakes=False, + processes=[samples], + mirror=True, + labelsByAxis=[f"_{p}" if p != recovar[0] else p for p in recovar], + ) + cardTool.setProcsNoStatUnc(cardTool.procGroups[samples]) + if source is not None: + # signal region selection + if wmass: + action_sel = lambda h, x: histselections.SignalSelectorABCD(h[x]).get_hist(h[x]) + else: + action_sel = lambda h, x: h[x] + + integration_var = {a:hist.sum for a in datagroups.gen_axes_names} # integrate out gen axes for bin by bin uncertainties + cardTool.addSystematic(**info, + nominalName=source[0], + name=source[1], + systAxes=recovar, + actionRequiresNomi=True, + action=lambda hv, hn: + hh.addHists( + hn[{"count":hist.sum, "acceptance":hist.sum}].project(*datagroups.gen_axes_names), + action_sel(hv, {"acceptance":True}).project(*recovar, *datagroups.gen_axes_names), + scale2=( + np.sqrt(action_sel(hv, {"acceptance":hist.sum, **integration_var}).variances(flow=True)) + / action_sel(hv, {"acceptance":hist.sum, **integration_var}).values(flow=True) + )[...,*[np.newaxis] * len(datagroups.gen_axes_names)] + ) + ) + else: + if args.fitresult: + info["group"] = "binByBinStat" + cardTool.addSystematic(**info, + name=cardTool.nominalName, + systAxes=recovar_syst, + action=lambda h: + hh.addHists(h.project(*recovar), + hh.expand_hist_by_duplicate_axes(h.project(*recovar), recovar, recovar_syst), + scale2=np.sqrt(h.project(*recovar).variances(flow=True))/h.project(*recovar).values(flow=True)) + ) + + def add_electroweak_uncertainty(card_tool, ewUncs, flavor="mu", samples="single_v_samples", passSystToFakes=True, wlike=False): info = dict( systAxes=["systIdx"], diff --git a/wremnants/theoryAgnostic_tools.py b/wremnants/theoryAgnostic_tools.py index 3f56f35dc..657ffda69 100644 --- a/wremnants/theoryAgnostic_tools.py +++ b/wremnants/theoryAgnostic_tools.py @@ -25,9 +25,9 @@ def select_fiducial_space(df, ptVgenMax, absYVgenMax, accept=True, select=True, logger.debug(f"Theory agnostic fiducial cut (out-of-acceptance): not ({selection})") return df -def define_helicity_weights(df): +def define_helicity_weights(df, filename=None): # define the helicity tensor, here nominal_weight will only have theory weights, no experimental pieces, it is defined in theory_tools.define_theory_weights_and_corrs - weightsByHelicity_helper = wremnants.makehelicityWeightHelper() + weightsByHelicity_helper = wremnants.makehelicityWeightHelper(filename) df = df.Define("helWeight_tensor", weightsByHelicity_helper, ["massVgen", "absYVgen", "ptVgen", "chargeVgen", "csSineCosThetaPhigen", "nominal_weight"]) df = df.Define("nominal_weight_helicity", "wrem::scalarmultiplyHelWeightTensor(nominal_weight, helWeight_tensor)") return df diff --git a/wremnants/unfolding_tools.py b/wremnants/unfolding_tools.py index 0067f24e6..12d025cf9 100644 --- a/wremnants/unfolding_tools.py +++ b/wremnants/unfolding_tools.py @@ -1,4 +1,5 @@ -from utilities import differential,common +from utilities import differential, common, logging +from wremnants import syst_tools, theory_tools, theory_corrections, theoryAgnostic_tools from wremnants import syst_tools, theory_tools, logging from copy import deepcopy import hist @@ -137,12 +138,12 @@ def select_fiducial_space(df, select=True, accept=True, mode="w_mass", **kwargs) if select and accept: df = df.Filter("acceptance") - elif select : + elif select: df = df.Filter("acceptance == 0") return df -def add_xnorm_histograms(results, df, args, dataset_name, corr_helpers, qcdScaleByHelicity_helper, unfolding_axes, unfolding_cols): +def add_xnorm_histograms(results, df, args, dataset_name, corr_helpers, qcdScaleByHelicity_helper, unfolding_axes, unfolding_cols, add_helicity_axis=False): # add histograms before any selection df_xnorm = df df_xnorm = df_xnorm.DefinePerSample("exp_weight", "1.0") @@ -156,7 +157,63 @@ def add_xnorm_histograms(results, df, args, dataset_name, corr_helpers, qcdScale xnorm_axes = [axis_xnorm, *unfolding_axes] xnorm_cols = ["xnorm", *unfolding_cols] - results.append(df_xnorm.HistoBoost("xnorm", xnorm_axes, [*xnorm_cols, "nominal_weight"])) + if add_helicity_axis: + df_xnorm = theoryAgnostic_tools.define_helicity_weights(df_xnorm, filename=f"{common.data_dir}/angularCoefficients/w_z_moments_unfoldingBinning.hdf5") - syst_tools.add_theory_hists(results, df_xnorm, args, dataset_name, corr_helpers, qcdScaleByHelicity_helper, xnorm_axes, xnorm_cols, base_name="xnorm") + from wremnants.helicity_utils import axis_helicity_multidim + results.append(df_xnorm.HistoBoost("xnorm", xnorm_axes, [*xnorm_cols, "nominal_weight_helicity"], tensor_axes=[axis_helicity_multidim])) + else: + results.append(df_xnorm.HistoBoost("xnorm", xnorm_axes, [*xnorm_cols, "nominal_weight"])) + + syst_tools.add_theory_hists( + results, + df_xnorm, + args, + dataset_name, + corr_helpers, + qcdScaleByHelicity_helper, + xnorm_axes, + xnorm_cols, + base_name="xnorm", + addhelicity=add_helicity_axis, + nhelicity=9, + ) + +def reweight_to_fitresult(fitresult, axes, poi_type = "nois", cme = 13, process = "Z", expected = False, flow=True): + # requires fitresult generated from 'fitresult_pois_to_hist.py' + histname = "hist_" + "_".join([a.name for a in axes]) + if expected: + histname += "_expected" + + import pickle + with open(fitresult, "rb") as f: + r = pickle.load(f) + if process == "W": + corrh_0 = r["results"][poi_type][f"chan_{str(cme).replace('.','p')}TeV"]["W_qGen0"][histname] + corrh_1 = r["results"][poi_type][f"chan_{str(cme).replace('.','p')}TeV"]["W_qGen1"][histname] + else: + corrh = r["results"][poi_type][f"chan_{str(cme).replace('.','p')}TeV"][process][histname] + + slices = [slice(None) for i in range(len(axes))] + + if "qGen" not in [a.name for a in axes]: + # CorrectionsTensor needs charge axis + if process == "Z": + axes.append(hist.axis.Regular(1, -1, 1, name="chargeVGen", flow=False)) + slices.append(np.newaxis) + values = corrh.values(flow=flow) + elif process == "W": + axes.append(hist.axis.Regular(2, -2, 2, name="chargeVGen", flow=False)) + slices.append(slice(None)) + values = np.stack([corrh_0.values(flow=flow), corrh_1.values(flow=flow)], axis=-1) + + ch = hist.Hist(*axes, hist.axis.Regular(1, 0, 1, name="vars", flow=False)) + slices.append(np.newaxis) + + ch = theory_corrections.set_corr_ratio_flow(ch) + ch.values(flow=flow)[...] = values[*slices] + + logger.debug(f"corrections from fitresult: {values}") + from wremnants.correctionsTensor_helper import makeCorrectionsTensor + return makeCorrectionsTensor(ch)