Merge pull request #493 from davidwalter2/240709_updUnfolding

Updates on unfolding
WMass · Jul 12, 2024 · 5495a7b · 5495a7b
2 parents bb40c91 + 0f64787
commit 5495a7b
Show file tree

Hide file tree

Showing 20 changed files with 1,666 additions and 683 deletions.
diff --git a/.github/workflows/unfolding.yml b/.github/workflows/unfolding.yml
@@ -422,14 +422,14 @@ jobs:
         # run with a reduced binning
         if: github.event_name == 'pull_request' || github.event.schedule == '30 5 * * 1-5'
         run: >-
-          scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run_python.sh scripts/histmakers/mz_dilepton.py --dataPath $DATAPATH 
+          scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run_python.sh scripts/histmakers/mz_dilepton.py --dataPath $DATAPATH --excludeFlow
           --analysisMode unfolding -j $((NTHREADS)) --maxFiles $((MAX_FILES)) --forceDefaultName -o $WREMNANTS_OUTDIR --axes ptll yll --postfix unfolding --genAxes ptVGen
 
       - name: dilepton analysis
         # run with full binning
         if: github.event_name != 'pull_request' && github.event.schedule != '30 5 * * 1-5'
         run: >-
-          scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run_python.sh scripts/histmakers/mz_dilepton.py --dataPath $DATAPATH 
+          scripts/ci/run_with_singularity.sh scripts/ci/setup_and_run_python.sh scripts/histmakers/mz_dilepton.py --dataPath $DATAPATH --excludeFlow
           --analysisMode unfolding -j $((NTHREADS)) --maxFiles $((MAX_FILES)) --forceDefaultName -o $WREMNANTS_OUTDIR --axes ptll yll --postfix unfolding
 
 

diff --git a/scripts/combine/pullsAndImpacts.py b/scripts/combine/pullsAndImpacts.py
@@ -10,14 +10,17 @@
 from dash import html
 from dash.dependencies import Input, Output
 from utilities import logging
-from utilities.io_tools import input_tools, output_tools, combinetf_input
+from utilities.io_tools import input_tools, output_tools, combinetf_input, conversion_tools
 from wremnants import plot_tools
 import os
 import re
 import json
+from narf import ioutils
 
 from utilities.styles.styles import nuisance_groupings as groupings
 
+logger = logging.child_logger(__name__)
+
 def writeOutput(fig, outfile, extensions=[], postfix=None, args=None, meta_info=None):
     name, ext = os.path.splitext(outfile)
     if ext not in extensions:
@@ -87,11 +90,11 @@ def plotImpacts(df, impact_title="", pulls=False, normalize=False, oneSidedImpac
 
     if impacts and include_ref:
         # append numerical values of impacts on nuisance name; fill up empty room with spaces to align numbers
-        frmt = "{:0"+str(int(np.log10(max(df[impact_str])))+2)+".2f}"
+        frmt = "{:0"+str(int(np.log10(max(df[impact_str])) if max(df[f"{impact_str}_ref"])>0 else 0)+2)+".2f}"
         nval = df[impact_str].apply(lambda x,frmt=frmt: frmt.format(x)) #.astype(str)
         nspace = nval.apply(lambda x, n=nval.apply(len).max(): " "*(n - len(x))) 
         if include_ref:
-            frmt_ref = "{:0"+str(int(np.log10(max(df[f"{impact_str}_ref"])))+2)+".2f}"
+            frmt_ref = "{:0"+str(int(np.log10(max(df[f"{impact_str}_ref"])) if max(df[f"{impact_str}_ref"])>0 else 0)+2)+".2f}"
             nval_ref = df[f'{impact_str}_ref'].apply(lambda x,frmt=frmt_ref: " ("+frmt.format(x)+")") #.round(2).astype(str)
             nspace_ref = nval_ref.apply(lambda x, n=nval_ref.apply(len).max(): " "*(n - len(x))) 
             nval = nval+nspace_ref+nval_ref 
@@ -256,16 +259,17 @@ def plotImpacts(df, impact_title="", pulls=False, normalize=False, oneSidedImpac
 
     return fig
 
-def readFitInfoFromFile(rf, filename, poi, group=False, stat=0.0, normalize=False, scale=1):    
-    impacts, labels, _ = combinetf_input.read_impacts_poi(rf, group, add_total=group, stat=stat, poi=poi, normalize=normalize)
-
-    if (group and grouping) or args.filters:
+def readFitInfoFromFile(rf, filename, poi, group=False, grouping=None, filters=[], stat=0.0, normalize=False, scale=1):    
+    logger.debug("Read impacts for poi from file")
+    impacts, labels, norm = combinetf_input.read_impacts_poi(rf, group, add_total=group, stat=stat, poi=poi, normalize=normalize)
+
+    if (group and grouping) or filters:
         filtimpacts = []
         filtlabels = []
         for impact,label in zip(impacts,labels):
             if group and grouping and label not in grouping:
                 continue
-            if args.filters and not any(re.match(f, label) for f in args.filters):
+            if filters and not any(re.match(f, label) for f in filters):
                 continue
             filtimpacts.append(impact)
             filtlabels.append(label)
@@ -293,8 +297,9 @@ def readFitInfoFromFile(rf, filename, poi, group=False, stat=0.0, normalize=Fals
 def parseArgs():
     sort_choices = ["label", "abspull", "constraint", "absimpact"]
     sort_choices += [
-        *[f"{c}_diff" for c in sort_choices],  # possibility to sort based on largest difference between inputfile and referencefile
-        *[f"{c}_ref" for c in sort_choices] ]  # possibility to sort based on referencefile
+        *[f"{c}_diff" for c in sort_choices],  # possibility to sort based on largest difference between input and referencefile
+        *[f"{c}_ref" for c in sort_choices],   # possibility to sort based on reference file
+        *[f"{c}_both" for c in sort_choices] ] # possibility to sort based on the largest/smallest of both input and reference file
 
     parser = argparse.ArgumentParser()
     parser.add_argument("-f", "--inputFile", type=str, required=True, help="fitresults output ROOT/hdf5 file from combinetf")
@@ -311,6 +316,8 @@ def parseArgs():
     parser.add_argument("--grouping", type=str, default=None, help="Select nuisances by a predefined grouping", choices=groupings.keys())
     parser.add_argument("-t","--translate", type=str, default=None, help="Specify .json file to translate labels")
     parser.add_argument("--noImpacts", action='store_true', help="Don't show impacts")
+    parser.add_argument("--poi", type=str, default=None, help="Specify POI to make impacts for, otherwise use all")
+    parser.add_argument("--poiType", type=str, default=None, help="POI type to make impacts for")
     parsers = parser.add_subparsers(dest='output_mode')
     interactive = parsers.add_parser("interactive", help="Launch and interactive dash session")
     interactive.add_argument("-i", "--interface", default="localhost", help="The network interface to bind to.")
@@ -335,7 +342,7 @@ def parseArgs():
     [Input("groups", "on")],
 )
 
-def producePlots(fitresult, args, poi, group=False, normalize=False, fitresult_ref=None):
+def producePlots(fitresult, args, poi, group=False, normalize=False, fitresult_ref=None, grouping=None):
     poi_type = poi.split("_")[-1] if poi else None
 
     if poi is not None and "MeV" in poi:
@@ -354,53 +361,58 @@ def producePlots(fitresult, args, poi, group=False, normalize=False, fitresult_r
             impact_title = "$\\mathrm{Impact\\ on\\ mass\\ diff. }(\\eta)\\ (\\mathrm{MeV})$"
         else:
             impact_title = "Impact on mass diff. (MeV)"
-    elif poi and poi.startswith("Width"):
+    elif poi and poi.startswith("width"):
         impact_title = "Impact on width (MeV)"
+    elif poi_type in ["pmaskedexp", "pmaskedexpnorm", "sumxsec", "sumxsecnorm"]:
+        if poi_type in ["pmaskedexp", "sumxsec"]:
+            meta = ioutils.pickle_load_h5py(fitresult["meta"])
+            channel_info = conversion_tools.combine_channels(meta, True)
+            if len(channel_info.keys()) == 1:
+                lumi = channel_info["chan_13TeV"]["lumi"]
+            else:
+                raise NotImplementedError(f"Found channels {[k for k in channel_info.keys()]} but only one channel is supported.")
+            scale = 1./(lumi*1000)
+            poi_name = "_".join(poi.split("_")[:-1])
+            impact_title = "$\\sigma_\\mathrm{fid}("+poi_name+") [\\mathrm{pb}]$" 
+        else:
+            impact_title = "$1/\\sigma_\\mathrm{fid} \\mathrm{d}\\sigma$"
+    elif poi_type in ["ratiometaratio"]:
+        poi_name = "_".join(poi.split("_")[:-1]).replace("r_","")
+        impact_title = f"Impact on ratio {poi_name} *1000"
+        scale=1000
     else:
         impact_title=poi
 
     if not (group and args.output_mode == 'output'):
         df = readFitInfoFromFile(fitresult, args.inputFile, poi, False, stat=args.stat/100., normalize=normalize, scale=scale)
     elif group:
-        df = readFitInfoFromFile(fitresult, args.inputFile, poi, True, stat=args.stat/100., normalize=normalize, scale=scale)
+        df = readFitInfoFromFile(fitresult, args.inputFile, poi, True, stat=args.stat/100., normalize=normalize, scale=scale, grouping=grouping)
 
     if fitresult_ref:
-        df_ref = readFitInfoFromFile(fitresult_ref, args.referenceFile, poi, group, stat=args.stat/100., normalize=normalize, scale=scale)
-        df = df.merge(df_ref, how="left", on="label", suffixes=("","_ref"))
+        df_ref = readFitInfoFromFile(fitresult_ref, args.referenceFile, poi, group, stat=args.stat/100., normalize=normalize, scale=scale, grouping=grouping)
+        df = df.merge(df_ref, how="outer", on="label", suffixes=("","_ref"))
 
-    if group and fitresult_ref and set(fitresult_ref["hsysts"]) != set(fitresult["hsysts"]):
-        # add another group for the uncertainty from all systematics that are not common among the two groups; 
-        # defined as err = sqrt( variance(total) - variance(common nuisances) )
-        np_common = [s in fitresult_ref["hsysts"][...] for s in fitresult["hsysts"][...]]
-        np_common_ref = [s in fitresult["hsysts"][...] for s in fitresult_ref["hsysts"][...]]
-        cov = fitresult["cov"][np_common, :][:, np_common]
-        cov_ref = fitresult_ref["cov"][np_common_ref, :][:, np_common_ref]
-
-        jac = fitresult["nuisance_impact_nois"][:,np_common]
-        jac_ref = fitresult_ref["nuisance_impact_nois"][:,np_common_ref]
-
-        df_total = df.loc[df["label"] == "Total"]
-        impact_others = np.sqrt(df_total["impact"].values[0]**2 - scale**2 * (jac @ (cov @ jac.T))[0][0])
-        impact_others_ref = np.sqrt(df_total["impact_ref"].values[0]**2 - scale**2 * (jac_ref @ (cov_ref @ jac_ref.T))[0][0])
-
-        new_row = {"label": "Others",
-            "impact": impact_others, "absimpact": abs(impact_others), "impact_color": df_total["impact_color"].values[0],
-            "impact_ref": impact_others_ref, "absimpact_ref": abs(impact_others_ref), "impact_color_ref": df_total["impact_color"].values[0],
-        }
-
-        df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
-        # remove rows with nuisance groups that are only included in one group but not in the other, since those are included in "Others" already
-        df = df.dropna()
+        # Set default values for missing entries in respective columns
+        default_values = {'impact_color': "#377eb8",  'impact_color_ref': "#377eb8"}  
+        for col in df.columns:
+            df[col].fillna(default_values.get(col, 0), inplace=True)
 
     if args.sort:
         if args.sort.endswith("diff"):
+            logger.debug("Sort impacts")
             key = args.sort.replace("_diff","")
             df[f"{key}_diff"] = df[key] - df[f"{key}_ref"]
+        elif args.sort.endswith("both"):
+            key = args.sort.replace("_both","")
+            if args.ascending:
+                df[f"{key}_both"] = df[[key, f"{key}_ref"]].max(axis=1)
+            else:
+                df[f"{key}_both"] = df[[key, f"{key}_ref"]].min(axis=1)
 
         df = df.sort_values(by=args.sort, ascending=args.ascending)
 
     df = df.fillna(0)
-
+    logger.debug("Make plots")
     if args.output_mode == "interactive":
         app.layout = html.Div([
                 dcc.Input(
@@ -492,9 +504,16 @@ def producePlots(fitresult, args, poi, group=False, normalize=False, fitresult_r
         producePlots(fitresult, args, None, fitresult_ref=fitresult_ref)
         exit()
 
-    pois = combinetf_input.get_poi_names(fitresult, poi_type=None)
+    if args.poi:
+        pois = [args.poi]
+    else:
+        pois = combinetf_input.get_poi_names(fitresult, poi_type=args.poiType)
+
     for poi in pois:
+        logger.debug(f"Now at {poi}")
         if args.mode in ["both", "ungrouped"]:
+            logger.debug(f"Make impact per nuisance")
             producePlots(fitresult, args, poi, fitresult_ref=fitresult_ref)
         if args.mode in ["both", "group"]:
-            producePlots(fitresult, args, poi, group=True, fitresult_ref=fitresult_ref)
+            logger.debug(f"Make impact my group")
+            producePlots(fitresult, args, poi, group=True, fitresult_ref=fitresult_ref, grouping=grouping)