Skip to content

Commit

Permalink
Transform x for power polynomials to [-1,1]; change fake uncertaintie…
Browse files Browse the repository at this point in the history
…s; disable swapping bins in case abcd-x axis is not integrated (for mt plotting in CI)
  • Loading branch information
davidwalter2 committed Aug 15, 2024
1 parent 199f3ea commit 8856b02
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 38 deletions.
32 changes: 7 additions & 25 deletions scripts/combine/setupCombine.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def make_parser(parser=None):
parser.add_argument("--pseudoDataIdxs", type=str, nargs="+", default=[None], help="Variation indices to use as pseudodata for each of the histograms")
parser.add_argument("--pseudoDataFile", type=str, help="Input file for pseudodata (if it should be read from a different file)", default=None)
parser.add_argument("--pseudoDataProcsRegexp", type=str, default=".*", help="Regular expression for processes taken from pseudodata file (all other processes are automatically got from the nominal file). Data is excluded automatically as usual")
parser.add_argument("--pseudoDataFakes", type=str, nargs="+", choices=["truthMC", "closure", "simple", "extrapolate", "extended1D", "extended2D", "dataClosure", "mcClosure", "simple-binned", "extended1D-fakerate"],
parser.add_argument("--pseudoDataFakes", type=str, nargs="+", choices=["truthMC", "closure", "simple", "extrapolate", "extended1D", "extended2D", "dataClosure", "mcClosure", "simple-binned", "extended1D-binned", "extended1D-fakerate"],
help="Pseudodata for fakes are using QCD MC (closure), or different estimation methods (simple, extended1D, extended2D)")
parser.add_argument("--addTauToSignal", action='store_true', help="Events from the same process but from tau final states are added to the signal")
parser.add_argument("--noPDFandQCDtheorySystOnSignal", action='store_true', help="Removes PDF and theory uncertainties on signal processes")
Expand Down Expand Up @@ -764,30 +764,12 @@ def assertSample(name, startsWith=["W", "Z"], excludeMatch=[]):
action=fakeselector.get_hist,
systAxes=[f"_{x}" for x in syst_axes if x in args.fakerateAxes]+["_param", "downUpVar"])
if args.fakeSmoothingMode in ["hybrid", "full"]:
if False:
# these uncertainties are covered by the binByBin stat uncertainties
# for the moment
subgroup = f"{cardTool.getFakeName()}Smoothing"
cardTool.addSystematic(**info,
rename=subgroup,
splitGroup = {subgroup: f".*", "experiment": ".*"},
systNamePrepend=subgroup,
actionArgs=dict(variations_smoothing=True),
)

subgroup = f"{cardTool.getFakeName()}SmoothingSyst"
cardTool.addSystematic(name=inputBaseName,
group="Fake",
processes=cardTool.getFakeName(),
noConstraint=False,
mirror=True,
scale=1.,
applySelection=False,
action = fakeselector.get_smoothing_syst,
rename=subgroup,
splitGroup = {subgroup: f".*", "experiment": ".*"},
systNamePrepend=subgroup,
systAxes = ["var"],
subgroup = f"{cardTool.getFakeName()}Smoothing"
cardTool.addSystematic(**info,
rename=subgroup,
splitGroup = {subgroup: f".*", "experiment": ".*"},
systNamePrepend=subgroup,
actionArgs=dict(variations_smoothing=True),
)

if args.fakeSmoothingMode in ["fakerate", "hybrid"]:
Expand Down
17 changes: 11 additions & 6 deletions wremnants/histselections.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,9 @@ def get_hist(self, h, is_nominal=False, variations_frf=False, variations_smoothi
h = self.transfer_variances(h, set_nominal=is_nominal)
y_frf, y_frf_var = self.compute_fakeratefactor(h, smoothing=True, syst_variations=variations_frf)

if self.swap_regions:
if not self.integrate_x and self.swap_regions:
logger.warning(f"Regions for fakerate estiamation can only be swapped if abcd-x axis is integrated")
if self.swap_regions and self.integrate_x:
if type(self) == FakeSelectorSimpleABCD:
# replace C region with B region
hC = self.get_hist_failX_passY(h)
Expand Down Expand Up @@ -426,9 +428,8 @@ def get_hist(self, h, is_nominal=False, variations_frf=False, variations_smoothi
elif variations_frf:
dvar = cval[..., np.newaxis,np.newaxis] * y_frf_var[...,:,:]
elif self.smoothing_mode in ["hybrid"]:
# keep statistical uncertainty from c region since we'll propagate that
# as a systematic
dvar = y_frf**2 * cvar_binned
# noo bin by bin statistical uncertainty, all regions covered by smoothing
dvar = np.zeros_like(cvar)
else:
# only take bin by bin uncertainty from c region
dvar = y_frf**2 * cvar
Expand Down Expand Up @@ -472,7 +473,9 @@ def compute_fakeratefactor(self, h, smoothing=False, syst_variations=False, flow

# select sideband regions
ha = self.get_hist_failX_failY(hNew)
if self.swap_regions:
if not self.integrate_x and self.swap_regions:
logger.warning(f"Regions for fakerate estiamation can only be swapped if abcd-x axis is integrated")
if self.swap_regions and self.integrate_x:
# replace B region with C region
hb = self.get_hist_passX_failY(hNew)
else:
Expand Down Expand Up @@ -817,7 +820,9 @@ def compute_fakeratefactor(self, h, smoothing=False, syst_variations=False, flow
hb = hNew[{self.name_x: self.sel_dx, self.name_y: self.sel_y}]
hbx = hNew[{self.name_x: self.sel_d2x, self.name_y: self.sel_y}]

if self.swap_regions:
if not self.integrate_x and self.swap_regions:
logger.warning(f"Regions for fakerate estiamation can only be swapped if abcd-x axis is integrated")
if self.swap_regions and self.integrate_x:
# replace Ax region with C region
hax = self.get_hist_passX_failY(hNew)
else:
Expand Down
21 changes: 14 additions & 7 deletions wremnants/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ def compute_chi2(y, y_pred, w=None, nparams=1):
ndf = y.shape[-1] - nparams
ndf_total = y.size - chi2.size*nparams

logger.info(f"Total chi2/ndf = {chi2_total}/{ndf_total} = {chi2_total/ndf_total} (p = {stats.chi2.sf(chi2_total, ndf_total)})")
logger.info(f"Min chi2/ndf = {chi2.min()}/{ndf} (p = {stats.chi2.sf(chi2.min(), ndf)})")
logger.info(f"Max chi2/ndf = {chi2.max()}/{ndf} (p = {stats.chi2.sf(chi2.max(), ndf)})")
logger.info(f"Mean chi2 = {chi2.mean()}")
logger.info(f"Std chi2 = {chi2.std()}")
logger.debug(f"Total chi2/ndf = {chi2_total}/{ndf_total} = {chi2_total/ndf_total} (p = {stats.chi2.sf(chi2_total, ndf_total)})")
logger.debug(f"Min chi2/ndf = {chi2.min()}/{ndf} (p = {stats.chi2.sf(chi2.min(), ndf)})")
logger.debug(f"Max chi2/ndf = {chi2.max()}/{ndf} (p = {stats.chi2.sf(chi2.max(), ndf)})")
logger.debug(f"Mean chi2 = {chi2.mean()}")
logger.debug(f"Std chi2 = {chi2.std()}")
return chi2, ndf


Expand Down Expand Up @@ -225,6 +225,8 @@ def transform_bernstein(x, min_x, max_x, cap_x=False):
raise RuntimeError(f"All values need to be within [0,1] but {np.sum(x < 0)} values smaller 0 ({x[x < 0]}) and {np.sum(x > 1)} larger 1 ({x[x > 1]}) found after transformation with xmin={xmin} and xmax={xmax}")
return x

def transform_power(x, *args, **kwargs):
return transform_bernstein(x, *args, **kwargs) * 2 - 1

class Regressor(object):
def __init__(
Expand Down Expand Up @@ -259,8 +261,10 @@ def __init__(
self.external_cov = None

def transform_x(self, x):
# if self.polynomial in ["bernstein", "monotonic"]:
x = transform_bernstein(x, self.min_x, self.max_x, self.cap_x)
if self.polynomial in ["bernstein", "monotonic"]:
x = transform_bernstein(x, self.min_x, self.max_x, self.cap_x)
else:
x = transform_power(x, self.min_x, self.max_x, self.cap_x)
return x

def solve(self, x, y, w, chi2_info=True):
Expand Down Expand Up @@ -358,6 +362,9 @@ def transform_x(self, x1, x2):
if self.polynomial in ["bernstein", "monotonic"]:
x1 = transform_bernstein(x1, self.min_x[0], self.max_x[0], self.cap_x[0])
x2 = transform_bernstein(x2, self.min_x[1], self.max_x[1], self.cap_x[1])
else:
x1 = transform_power(x1, self.min_x[0], self.max_x[0], self.cap_x[0])
x2 = transform_power(x2, self.min_x[1], self.max_x[1], self.cap_x[1])
return x1, x2

def solve(self, x1, x2, y, w, flatten=False):
Expand Down

0 comments on commit 8856b02

Please sign in to comment.