From 8856b02053e3a10f27dc2e43aaf595653a53bc36 Mon Sep 17 00:00:00 2001 From: David Date: Thu, 15 Aug 2024 21:42:22 +0200 Subject: [PATCH] Transform x for power polynomials to [-1,1]; change fake uncertainties; disable swapping bins in case abcd-x axis is not integrated (for mt plotting in CI) --- scripts/combine/setupCombine.py | 32 +++++++------------------------- wremnants/histselections.py | 17 +++++++++++------ wremnants/regression.py | 21 ++++++++++++++------- 3 files changed, 32 insertions(+), 38 deletions(-) diff --git a/scripts/combine/setupCombine.py b/scripts/combine/setupCombine.py index 3dd478015..2c883f276 100644 --- a/scripts/combine/setupCombine.py +++ b/scripts/combine/setupCombine.py @@ -130,7 +130,7 @@ def make_parser(parser=None): parser.add_argument("--pseudoDataIdxs", type=str, nargs="+", default=[None], help="Variation indices to use as pseudodata for each of the histograms") parser.add_argument("--pseudoDataFile", type=str, help="Input file for pseudodata (if it should be read from a different file)", default=None) parser.add_argument("--pseudoDataProcsRegexp", type=str, default=".*", help="Regular expression for processes taken from pseudodata file (all other processes are automatically got from the nominal file). Data is excluded automatically as usual") - parser.add_argument("--pseudoDataFakes", type=str, nargs="+", choices=["truthMC", "closure", "simple", "extrapolate", "extended1D", "extended2D", "dataClosure", "mcClosure", "simple-binned", "extended1D-fakerate"], + parser.add_argument("--pseudoDataFakes", type=str, nargs="+", choices=["truthMC", "closure", "simple", "extrapolate", "extended1D", "extended2D", "dataClosure", "mcClosure", "simple-binned", "extended1D-binned", "extended1D-fakerate"], help="Pseudodata for fakes are using QCD MC (closure), or different estimation methods (simple, extended1D, extended2D)") parser.add_argument("--addTauToSignal", action='store_true', help="Events from the same process but from tau final states are added to the signal") parser.add_argument("--noPDFandQCDtheorySystOnSignal", action='store_true', help="Removes PDF and theory uncertainties on signal processes") @@ -764,30 +764,12 @@ def assertSample(name, startsWith=["W", "Z"], excludeMatch=[]): action=fakeselector.get_hist, systAxes=[f"_{x}" for x in syst_axes if x in args.fakerateAxes]+["_param", "downUpVar"]) if args.fakeSmoothingMode in ["hybrid", "full"]: - if False: - # these uncertainties are covered by the binByBin stat uncertainties - # for the moment - subgroup = f"{cardTool.getFakeName()}Smoothing" - cardTool.addSystematic(**info, - rename=subgroup, - splitGroup = {subgroup: f".*", "experiment": ".*"}, - systNamePrepend=subgroup, - actionArgs=dict(variations_smoothing=True), - ) - - subgroup = f"{cardTool.getFakeName()}SmoothingSyst" - cardTool.addSystematic(name=inputBaseName, - group="Fake", - processes=cardTool.getFakeName(), - noConstraint=False, - mirror=True, - scale=1., - applySelection=False, - action = fakeselector.get_smoothing_syst, - rename=subgroup, - splitGroup = {subgroup: f".*", "experiment": ".*"}, - systNamePrepend=subgroup, - systAxes = ["var"], + subgroup = f"{cardTool.getFakeName()}Smoothing" + cardTool.addSystematic(**info, + rename=subgroup, + splitGroup = {subgroup: f".*", "experiment": ".*"}, + systNamePrepend=subgroup, + actionArgs=dict(variations_smoothing=True), ) if args.fakeSmoothingMode in ["fakerate", "hybrid"]: diff --git a/wremnants/histselections.py b/wremnants/histselections.py index aab5e5b6d..ab4bbe29b 100644 --- a/wremnants/histselections.py +++ b/wremnants/histselections.py @@ -390,7 +390,9 @@ def get_hist(self, h, is_nominal=False, variations_frf=False, variations_smoothi h = self.transfer_variances(h, set_nominal=is_nominal) y_frf, y_frf_var = self.compute_fakeratefactor(h, smoothing=True, syst_variations=variations_frf) - if self.swap_regions: + if not self.integrate_x and self.swap_regions: + logger.warning(f"Regions for fakerate estiamation can only be swapped if abcd-x axis is integrated") + if self.swap_regions and self.integrate_x: if type(self) == FakeSelectorSimpleABCD: # replace C region with B region hC = self.get_hist_failX_passY(h) @@ -426,9 +428,8 @@ def get_hist(self, h, is_nominal=False, variations_frf=False, variations_smoothi elif variations_frf: dvar = cval[..., np.newaxis,np.newaxis] * y_frf_var[...,:,:] elif self.smoothing_mode in ["hybrid"]: - # keep statistical uncertainty from c region since we'll propagate that - # as a systematic - dvar = y_frf**2 * cvar_binned + # noo bin by bin statistical uncertainty, all regions covered by smoothing + dvar = np.zeros_like(cvar) else: # only take bin by bin uncertainty from c region dvar = y_frf**2 * cvar @@ -472,7 +473,9 @@ def compute_fakeratefactor(self, h, smoothing=False, syst_variations=False, flow # select sideband regions ha = self.get_hist_failX_failY(hNew) - if self.swap_regions: + if not self.integrate_x and self.swap_regions: + logger.warning(f"Regions for fakerate estiamation can only be swapped if abcd-x axis is integrated") + if self.swap_regions and self.integrate_x: # replace B region with C region hb = self.get_hist_passX_failY(hNew) else: @@ -817,7 +820,9 @@ def compute_fakeratefactor(self, h, smoothing=False, syst_variations=False, flow hb = hNew[{self.name_x: self.sel_dx, self.name_y: self.sel_y}] hbx = hNew[{self.name_x: self.sel_d2x, self.name_y: self.sel_y}] - if self.swap_regions: + if not self.integrate_x and self.swap_regions: + logger.warning(f"Regions for fakerate estiamation can only be swapped if abcd-x axis is integrated") + if self.swap_regions and self.integrate_x: # replace Ax region with C region hax = self.get_hist_passX_failY(hNew) else: diff --git a/wremnants/regression.py b/wremnants/regression.py index 9ac2d2f4e..c3c5c0cd9 100644 --- a/wremnants/regression.py +++ b/wremnants/regression.py @@ -20,11 +20,11 @@ def compute_chi2(y, y_pred, w=None, nparams=1): ndf = y.shape[-1] - nparams ndf_total = y.size - chi2.size*nparams - logger.info(f"Total chi2/ndf = {chi2_total}/{ndf_total} = {chi2_total/ndf_total} (p = {stats.chi2.sf(chi2_total, ndf_total)})") - logger.info(f"Min chi2/ndf = {chi2.min()}/{ndf} (p = {stats.chi2.sf(chi2.min(), ndf)})") - logger.info(f"Max chi2/ndf = {chi2.max()}/{ndf} (p = {stats.chi2.sf(chi2.max(), ndf)})") - logger.info(f"Mean chi2 = {chi2.mean()}") - logger.info(f"Std chi2 = {chi2.std()}") + logger.debug(f"Total chi2/ndf = {chi2_total}/{ndf_total} = {chi2_total/ndf_total} (p = {stats.chi2.sf(chi2_total, ndf_total)})") + logger.debug(f"Min chi2/ndf = {chi2.min()}/{ndf} (p = {stats.chi2.sf(chi2.min(), ndf)})") + logger.debug(f"Max chi2/ndf = {chi2.max()}/{ndf} (p = {stats.chi2.sf(chi2.max(), ndf)})") + logger.debug(f"Mean chi2 = {chi2.mean()}") + logger.debug(f"Std chi2 = {chi2.std()}") return chi2, ndf @@ -225,6 +225,8 @@ def transform_bernstein(x, min_x, max_x, cap_x=False): raise RuntimeError(f"All values need to be within [0,1] but {np.sum(x < 0)} values smaller 0 ({x[x < 0]}) and {np.sum(x > 1)} larger 1 ({x[x > 1]}) found after transformation with xmin={xmin} and xmax={xmax}") return x +def transform_power(x, *args, **kwargs): + return transform_bernstein(x, *args, **kwargs) * 2 - 1 class Regressor(object): def __init__( @@ -259,8 +261,10 @@ def __init__( self.external_cov = None def transform_x(self, x): - # if self.polynomial in ["bernstein", "monotonic"]: - x = transform_bernstein(x, self.min_x, self.max_x, self.cap_x) + if self.polynomial in ["bernstein", "monotonic"]: + x = transform_bernstein(x, self.min_x, self.max_x, self.cap_x) + else: + x = transform_power(x, self.min_x, self.max_x, self.cap_x) return x def solve(self, x, y, w, chi2_info=True): @@ -358,6 +362,9 @@ def transform_x(self, x1, x2): if self.polynomial in ["bernstein", "monotonic"]: x1 = transform_bernstein(x1, self.min_x[0], self.max_x[0], self.cap_x[0]) x2 = transform_bernstein(x2, self.min_x[1], self.max_x[1], self.cap_x[1]) + else: + x1 = transform_power(x1, self.min_x[0], self.max_x[0], self.cap_x[0]) + x2 = transform_power(x2, self.min_x[1], self.max_x[1], self.cap_x[1]) return x1, x2 def solve(self, x1, x2, y, w, flatten=False):