Skip to content

Commit

Permalink
Merge pull request #493 from davidwalter2/240709_updUnfolding
Browse files Browse the repository at this point in the history
Updates on unfolding
  • Loading branch information
davidwalter2 authored Jul 12, 2024
2 parents bb40c91 + 0f64787 commit 5495a7b
Show file tree
Hide file tree
Showing 20 changed files with 1,666 additions and 683 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/unfolding.yml
Original file line number Diff line number Diff line change
Expand Up @@ -422,14 +422,14 @@ jobs:
# run with a reduced binning
if: github.event_name == 'pull_request' || github.event.schedule == '30 5 * * 1-5'
run: >-
scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run_python.sh scripts/histmakers/mz_dilepton.py --dataPath $DATAPATH
scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run_python.sh scripts/histmakers/mz_dilepton.py --dataPath $DATAPATH --excludeFlow
--analysisMode unfolding -j $((NTHREADS)) --maxFiles $((MAX_FILES)) --forceDefaultName -o $WREMNANTS_OUTDIR --axes ptll yll --postfix unfolding --genAxes ptVGen
- name: dilepton analysis
# run with full binning
if: github.event_name != 'pull_request' && github.event.schedule != '30 5 * * 1-5'
run: >-
scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run_python.sh scripts/histmakers/mz_dilepton.py --dataPath $DATAPATH
scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run_python.sh scripts/histmakers/mz_dilepton.py --dataPath $DATAPATH --excludeFlow
--analysisMode unfolding -j $((NTHREADS)) --maxFiles $((MAX_FILES)) --forceDefaultName -o $WREMNANTS_OUTDIR --axes ptll yll --postfix unfolding
Expand Down
101 changes: 60 additions & 41 deletions scripts/combine/pullsAndImpacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,17 @@
from dash import html
from dash.dependencies import Input, Output
from utilities import logging
from utilities.io_tools import input_tools, output_tools, combinetf_input
from utilities.io_tools import input_tools, output_tools, combinetf_input, conversion_tools
from wremnants import plot_tools
import os
import re
import json
from narf import ioutils

from utilities.styles.styles import nuisance_groupings as groupings

logger = logging.child_logger(__name__)

def writeOutput(fig, outfile, extensions=[], postfix=None, args=None, meta_info=None):
name, ext = os.path.splitext(outfile)
if ext not in extensions:
Expand Down Expand Up @@ -87,11 +90,11 @@ def plotImpacts(df, impact_title="", pulls=False, normalize=False, oneSidedImpac

if impacts and include_ref:
# append numerical values of impacts on nuisance name; fill up empty room with spaces to align numbers
frmt = "{:0"+str(int(np.log10(max(df[impact_str])))+2)+".2f}"
frmt = "{:0"+str(int(np.log10(max(df[impact_str])) if max(df[f"{impact_str}_ref"])>0 else 0)+2)+".2f}"
nval = df[impact_str].apply(lambda x,frmt=frmt: frmt.format(x)) #.astype(str)
nspace = nval.apply(lambda x, n=nval.apply(len).max(): " "*(n - len(x)))
if include_ref:
frmt_ref = "{:0"+str(int(np.log10(max(df[f"{impact_str}_ref"])))+2)+".2f}"
frmt_ref = "{:0"+str(int(np.log10(max(df[f"{impact_str}_ref"])) if max(df[f"{impact_str}_ref"])>0 else 0)+2)+".2f}"
nval_ref = df[f'{impact_str}_ref'].apply(lambda x,frmt=frmt_ref: " ("+frmt.format(x)+")") #.round(2).astype(str)
nspace_ref = nval_ref.apply(lambda x, n=nval_ref.apply(len).max(): " "*(n - len(x)))
nval = nval+nspace_ref+nval_ref
Expand Down Expand Up @@ -256,16 +259,17 @@ def plotImpacts(df, impact_title="", pulls=False, normalize=False, oneSidedImpac

return fig

def readFitInfoFromFile(rf, filename, poi, group=False, stat=0.0, normalize=False, scale=1):
impacts, labels, _ = combinetf_input.read_impacts_poi(rf, group, add_total=group, stat=stat, poi=poi, normalize=normalize)

if (group and grouping) or args.filters:
def readFitInfoFromFile(rf, filename, poi, group=False, grouping=None, filters=[], stat=0.0, normalize=False, scale=1):
logger.debug("Read impacts for poi from file")
impacts, labels, norm = combinetf_input.read_impacts_poi(rf, group, add_total=group, stat=stat, poi=poi, normalize=normalize)

if (group and grouping) or filters:
filtimpacts = []
filtlabels = []
for impact,label in zip(impacts,labels):
if group and grouping and label not in grouping:
continue
if args.filters and not any(re.match(f, label) for f in args.filters):
if filters and not any(re.match(f, label) for f in filters):
continue
filtimpacts.append(impact)
filtlabels.append(label)
Expand Down Expand Up @@ -293,8 +297,9 @@ def readFitInfoFromFile(rf, filename, poi, group=False, stat=0.0, normalize=Fals
def parseArgs():
sort_choices = ["label", "abspull", "constraint", "absimpact"]
sort_choices += [
*[f"{c}_diff" for c in sort_choices], # possibility to sort based on largest difference between inputfile and referencefile
*[f"{c}_ref" for c in sort_choices] ] # possibility to sort based on referencefile
*[f"{c}_diff" for c in sort_choices], # possibility to sort based on largest difference between input and referencefile
*[f"{c}_ref" for c in sort_choices], # possibility to sort based on reference file
*[f"{c}_both" for c in sort_choices] ] # possibility to sort based on the largest/smallest of both input and reference file

parser = argparse.ArgumentParser()
parser.add_argument("-f", "--inputFile", type=str, required=True, help="fitresults output ROOT/hdf5 file from combinetf")
Expand All @@ -311,6 +316,8 @@ def parseArgs():
parser.add_argument("--grouping", type=str, default=None, help="Select nuisances by a predefined grouping", choices=groupings.keys())
parser.add_argument("-t","--translate", type=str, default=None, help="Specify .json file to translate labels")
parser.add_argument("--noImpacts", action='store_true', help="Don't show impacts")
parser.add_argument("--poi", type=str, default=None, help="Specify POI to make impacts for, otherwise use all")
parser.add_argument("--poiType", type=str, default=None, help="POI type to make impacts for")
parsers = parser.add_subparsers(dest='output_mode')
interactive = parsers.add_parser("interactive", help="Launch and interactive dash session")
interactive.add_argument("-i", "--interface", default="localhost", help="The network interface to bind to.")
Expand All @@ -335,7 +342,7 @@ def parseArgs():
[Input("groups", "on")],
)

def producePlots(fitresult, args, poi, group=False, normalize=False, fitresult_ref=None):
def producePlots(fitresult, args, poi, group=False, normalize=False, fitresult_ref=None, grouping=None):
poi_type = poi.split("_")[-1] if poi else None

if poi is not None and "MeV" in poi:
Expand All @@ -354,53 +361,58 @@ def producePlots(fitresult, args, poi, group=False, normalize=False, fitresult_r
impact_title = "$\\mathrm{Impact\\ on\\ mass\\ diff. }(\\eta)\\ (\\mathrm{MeV})$"
else:
impact_title = "Impact on mass diff. (MeV)"
elif poi and poi.startswith("Width"):
elif poi and poi.startswith("width"):
impact_title = "Impact on width (MeV)"
elif poi_type in ["pmaskedexp", "pmaskedexpnorm", "sumxsec", "sumxsecnorm"]:
if poi_type in ["pmaskedexp", "sumxsec"]:
meta = ioutils.pickle_load_h5py(fitresult["meta"])
channel_info = conversion_tools.combine_channels(meta, True)
if len(channel_info.keys()) == 1:
lumi = channel_info["chan_13TeV"]["lumi"]
else:
raise NotImplementedError(f"Found channels {[k for k in channel_info.keys()]} but only one channel is supported.")
scale = 1./(lumi*1000)
poi_name = "_".join(poi.split("_")[:-1])
impact_title = "$\\sigma_\\mathrm{fid}("+poi_name+") [\\mathrm{pb}]$"
else:
impact_title = "$1/\\sigma_\\mathrm{fid} \\mathrm{d}\\sigma$"
elif poi_type in ["ratiometaratio"]:
poi_name = "_".join(poi.split("_")[:-1]).replace("r_","")
impact_title = f"Impact on ratio {poi_name} *1000"
scale=1000
else:
impact_title=poi

if not (group and args.output_mode == 'output'):
df = readFitInfoFromFile(fitresult, args.inputFile, poi, False, stat=args.stat/100., normalize=normalize, scale=scale)
elif group:
df = readFitInfoFromFile(fitresult, args.inputFile, poi, True, stat=args.stat/100., normalize=normalize, scale=scale)
df = readFitInfoFromFile(fitresult, args.inputFile, poi, True, stat=args.stat/100., normalize=normalize, scale=scale, grouping=grouping)

if fitresult_ref:
df_ref = readFitInfoFromFile(fitresult_ref, args.referenceFile, poi, group, stat=args.stat/100., normalize=normalize, scale=scale)
df = df.merge(df_ref, how="left", on="label", suffixes=("","_ref"))
df_ref = readFitInfoFromFile(fitresult_ref, args.referenceFile, poi, group, stat=args.stat/100., normalize=normalize, scale=scale, grouping=grouping)
df = df.merge(df_ref, how="outer", on="label", suffixes=("","_ref"))

if group and fitresult_ref and set(fitresult_ref["hsysts"]) != set(fitresult["hsysts"]):
# add another group for the uncertainty from all systematics that are not common among the two groups;
# defined as err = sqrt( variance(total) - variance(common nuisances) )
np_common = [s in fitresult_ref["hsysts"][...] for s in fitresult["hsysts"][...]]
np_common_ref = [s in fitresult["hsysts"][...] for s in fitresult_ref["hsysts"][...]]
cov = fitresult["cov"][np_common, :][:, np_common]
cov_ref = fitresult_ref["cov"][np_common_ref, :][:, np_common_ref]

jac = fitresult["nuisance_impact_nois"][:,np_common]
jac_ref = fitresult_ref["nuisance_impact_nois"][:,np_common_ref]

df_total = df.loc[df["label"] == "Total"]
impact_others = np.sqrt(df_total["impact"].values[0]**2 - scale**2 * (jac @ (cov @ jac.T))[0][0])
impact_others_ref = np.sqrt(df_total["impact_ref"].values[0]**2 - scale**2 * (jac_ref @ (cov_ref @ jac_ref.T))[0][0])

new_row = {"label": "Others",
"impact": impact_others, "absimpact": abs(impact_others), "impact_color": df_total["impact_color"].values[0],
"impact_ref": impact_others_ref, "absimpact_ref": abs(impact_others_ref), "impact_color_ref": df_total["impact_color"].values[0],
}

df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
# remove rows with nuisance groups that are only included in one group but not in the other, since those are included in "Others" already
df = df.dropna()
# Set default values for missing entries in respective columns
default_values = {'impact_color': "#377eb8", 'impact_color_ref': "#377eb8"}
for col in df.columns:
df[col].fillna(default_values.get(col, 0), inplace=True)

if args.sort:
if args.sort.endswith("diff"):
logger.debug("Sort impacts")
key = args.sort.replace("_diff","")
df[f"{key}_diff"] = df[key] - df[f"{key}_ref"]
elif args.sort.endswith("both"):
key = args.sort.replace("_both","")
if args.ascending:
df[f"{key}_both"] = df[[key, f"{key}_ref"]].max(axis=1)
else:
df[f"{key}_both"] = df[[key, f"{key}_ref"]].min(axis=1)

df = df.sort_values(by=args.sort, ascending=args.ascending)

df = df.fillna(0)

logger.debug("Make plots")
if args.output_mode == "interactive":
app.layout = html.Div([
dcc.Input(
Expand Down Expand Up @@ -492,9 +504,16 @@ def producePlots(fitresult, args, poi, group=False, normalize=False, fitresult_r
producePlots(fitresult, args, None, fitresult_ref=fitresult_ref)
exit()

pois = combinetf_input.get_poi_names(fitresult, poi_type=None)
if args.poi:
pois = [args.poi]
else:
pois = combinetf_input.get_poi_names(fitresult, poi_type=args.poiType)

for poi in pois:
logger.debug(f"Now at {poi}")
if args.mode in ["both", "ungrouped"]:
logger.debug(f"Make impact per nuisance")
producePlots(fitresult, args, poi, fitresult_ref=fitresult_ref)
if args.mode in ["both", "group"]:
producePlots(fitresult, args, poi, group=True, fitresult_ref=fitresult_ref)
logger.debug(f"Make impact my group")
producePlots(fitresult, args, poi, group=True, fitresult_ref=fitresult_ref, grouping=grouping)
Loading

0 comments on commit 5495a7b

Please sign in to comment.