Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update datacardMaker to make IM scalings #434

Merged
merged 16 commits into from
Dec 10, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 63 additions & 51 deletions analysis/topeft_run2/datacards_post_processing.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
import shutil
import argparse

import json

Check warning on line 4 in analysis/topeft_run2/datacards_post_processing.py

View check run for this annotation

Codecov / codecov/patch

analysis/topeft_run2/datacards_post_processing.py#L4

Added line #L4 was not covered by tests
# This script does some basic checks of the cards and templates produced by the `make_cards.py` script.
# - It also can parse the condor log files and dump a summary of the contents
# - Additionally, it can also grab the right set of ptz and lj0pt templates (for the right categories) used in TOP-22-006
Expand All @@ -17,50 +17,49 @@
# The list of ptz and lj0pt we choose to use in each category for TOP-22-006
TOP22006_CATEGORIES = [

"ttx_multileptons-3l_onZ_1b_2j_ptz",
"ttx_multileptons-3l_onZ_1b_3j_ptz",
"ttx_multileptons-3l_onZ_1b_4j_ptz",
"ttx_multileptons-3l_onZ_1b_5j_ptz",
"ttx_multileptons-3l_onZ_2b_4j_ptz",
"ttx_multileptons-3l_onZ_2b_5j_ptz",

"ttx_multileptons-2lss_4t_m_4j_lj0pt",
"ttx_multileptons-2lss_4t_m_5j_lj0pt",
"ttx_multileptons-2lss_4t_m_6j_lj0pt",
"ttx_multileptons-2lss_4t_m_7j_lj0pt",
"ttx_multileptons-2lss_4t_p_4j_lj0pt",
"ttx_multileptons-2lss_4t_p_5j_lj0pt",
"ttx_multileptons-2lss_4t_p_6j_lj0pt",
"ttx_multileptons-2lss_4t_p_7j_lj0pt",
"ttx_multileptons-2lss_m_4j_lj0pt",
"ttx_multileptons-2lss_m_5j_lj0pt",
"ttx_multileptons-2lss_m_6j_lj0pt",
"ttx_multileptons-2lss_m_7j_lj0pt",
"ttx_multileptons-2lss_p_4j_lj0pt",
"ttx_multileptons-2lss_p_5j_lj0pt",
"ttx_multileptons-2lss_p_6j_lj0pt",
"ttx_multileptons-2lss_p_7j_lj0pt",
"ttx_multileptons-3l_m_offZ_1b_2j_lj0pt",
"ttx_multileptons-3l_m_offZ_1b_3j_lj0pt",
"ttx_multileptons-3l_m_offZ_1b_4j_lj0pt",
"ttx_multileptons-3l_m_offZ_1b_5j_lj0pt",
"ttx_multileptons-3l_m_offZ_2b_2j_lj0pt",
"ttx_multileptons-3l_m_offZ_2b_3j_lj0pt",
"ttx_multileptons-3l_m_offZ_2b_4j_lj0pt",
"ttx_multileptons-3l_m_offZ_2b_5j_lj0pt",
"ttx_multileptons-3l_onZ_2b_2j_lj0pt",
"ttx_multileptons-3l_onZ_2b_3j_lj0pt",
"ttx_multileptons-3l_p_offZ_1b_2j_lj0pt",
"ttx_multileptons-3l_p_offZ_1b_3j_lj0pt",
"ttx_multileptons-3l_p_offZ_1b_4j_lj0pt",
"ttx_multileptons-3l_p_offZ_1b_5j_lj0pt",
"ttx_multileptons-3l_p_offZ_2b_2j_lj0pt",
"ttx_multileptons-3l_p_offZ_2b_3j_lj0pt",
"ttx_multileptons-3l_p_offZ_2b_4j_lj0pt",
"ttx_multileptons-3l_p_offZ_2b_5j_lj0pt",
"ttx_multileptons-4l_2j_lj0pt",
"ttx_multileptons-4l_3j_lj0pt",
"ttx_multileptons-4l_4j_lj0pt",
"2lss_4t_m_4j_lj0pt",
"2lss_4t_m_5j_lj0pt",
"2lss_4t_m_6j_lj0pt",
"2lss_4t_m_7j_lj0pt",
"2lss_4t_p_4j_lj0pt",
"2lss_4t_p_5j_lj0pt",
"2lss_4t_p_6j_lj0pt",
"2lss_4t_p_7j_lj0pt",
"2lss_m_4j_lj0pt",
"2lss_m_5j_lj0pt",
"2lss_m_6j_lj0pt",
"2lss_m_7j_lj0pt",
"2lss_p_4j_lj0pt",
"2lss_p_5j_lj0pt",
"2lss_p_6j_lj0pt",
"2lss_p_7j_lj0pt",
"3l_m_offZ_1b_2j_lj0pt",
"3l_m_offZ_1b_3j_lj0pt",
"3l_m_offZ_1b_4j_lj0pt",
"3l_m_offZ_1b_5j_lj0pt",
"3l_m_offZ_2b_2j_lj0pt",
"3l_m_offZ_2b_3j_lj0pt",
"3l_m_offZ_2b_4j_lj0pt",
"3l_m_offZ_2b_5j_lj0pt",
"3l_onZ_1b_2j_ptz",
"3l_onZ_1b_3j_ptz",
"3l_onZ_1b_4j_ptz",
"3l_onZ_1b_5j_ptz",
"3l_onZ_2b_2j_lj0pt",
"3l_onZ_2b_3j_lj0pt",
"3l_onZ_2b_4j_ptz",
"3l_onZ_2b_5j_ptz",
"3l_p_offZ_1b_2j_lj0pt",
"3l_p_offZ_1b_3j_lj0pt",
"3l_p_offZ_1b_4j_lj0pt",
"3l_p_offZ_1b_5j_lj0pt",
"3l_p_offZ_2b_2j_lj0pt",
"3l_p_offZ_2b_3j_lj0pt",
"3l_p_offZ_2b_4j_lj0pt",
"3l_p_offZ_2b_5j_lj0pt",
"4l_2j_lj0pt",
"4l_3j_lj0pt",
"4l_4j_lj0pt",
]

# Return list of lines in a file
Expand Down Expand Up @@ -90,6 +89,9 @@

###### Print out general info ######

with open(os.path.join(args.datacards_path,'scalings-preselect.json'), 'r') as file:
scalings_content = json.load(file)

Check warning on line 93 in analysis/topeft_run2/datacards_post_processing.py

View check run for this annotation

Codecov / codecov/patch

analysis/topeft_run2/datacards_post_processing.py#L92-L93

Added lines #L92 - L93 were not covered by tests

# Count the number of text data cards and root templates
n_text_cards = 0
n_root_templates = 0
Expand Down Expand Up @@ -135,8 +137,6 @@
for line in lines_from_condor_out_to_print:
print(f"\t\t* In {line[0]}: {line[1]}")



####### Copy the TOP-22-006 relevant files to their own dir ######

# Grab the ptz-lj0pt cards we want for TOP-22-006, copy into a dir
Expand All @@ -148,13 +148,25 @@
print(f"\nCopying TOP-22-006 relevant files to {ptzlj0pt_path}...")
for fname in datacard_files:
file_name_strip_ext = os.path.splitext(fname)[0]
if file_name_strip_ext in TOP22006_CATEGORIES:
shutil.copyfile(os.path.join(args.datacards_path,fname),os.path.join(ptzlj0pt_path,fname))
if fname.endswith(".txt"): n_txt += 1
if fname.endswith(".root"): n_root += 1
for file in TOP22006_CATEGORIES:
if file in file_name_strip_ext:
shutil.copyfile(os.path.join(args.datacards_path,fname),os.path.join(ptzlj0pt_path,fname))
if fname.endswith(".txt"): n_txt += 1
if fname.endswith(".root"): n_root += 1

Check warning on line 155 in analysis/topeft_run2/datacards_post_processing.py

View check run for this annotation

Codecov / codecov/patch

analysis/topeft_run2/datacards_post_processing.py#L151-L155

Added lines #L151 - L155 were not covered by tests
#also copy the selectedWCs.txt file
shutil.copyfile(os.path.join(args.datacards_path,"selectedWCs.txt"),os.path.join(ptzlj0pt_path,"selectedWCs.txt"))

for item in scalings_content:
channel_name = item.get("channel")
if channel_name in TOP22006_CATEGORIES:
ch_index = TOP22006_CATEGORIES.index(channel_name) + 1
item["channel"] = "ch" + str(ch_index)

Check warning on line 163 in analysis/topeft_run2/datacards_post_processing.py

View check run for this annotation

Codecov / codecov/patch

analysis/topeft_run2/datacards_post_processing.py#L159-L163

Added lines #L159 - L163 were not covered by tests
else:
scalings_content = [d for d in scalings_content if d != item]

Check warning on line 165 in analysis/topeft_run2/datacards_post_processing.py

View check run for this annotation

Codecov / codecov/patch

analysis/topeft_run2/datacards_post_processing.py#L165

Added line #L165 was not covered by tests

with open(os.path.join(ptzlj0pt_path, 'scalings.json'), 'w') as file:
json.dump(scalings_content, file, indent=4)

Check warning on line 168 in analysis/topeft_run2/datacards_post_processing.py

View check run for this annotation

Codecov / codecov/patch

analysis/topeft_run2/datacards_post_processing.py#L167-L168

Added lines #L167 - L168 were not covered by tests

# Check that we got the expected number and print what we learn
print(f"\tNumber of text templates copied: {n_txt}")
print(f"\tNumber of root templates copied: {n_txt}")
Expand Down
2 changes: 1 addition & 1 deletion analysis/topeft_run2/fullR2_run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ OUT_NAME="example_name"

# Build the run command for filling SR histos
CFGS="../../input_samples/cfgs/mc_signal_samples_NDSkim.cfg,../../input_samples/cfgs/mc_background_samples_NDSkim.cfg,../../input_samples/cfgs/data_samples_NDSkim.cfg"
OPTIONS="--hist-list ana --skip-cr --do-systs -s 50000 --do-np -o $OUT_NAME" # For analysis
OPTIONS="--hist-list ana --skip-cr --do-systs -s 50000 --wc-list cQQ1 cQei cQl3i cQlMi cQq11 cQq13 cQq81 cQq83 cQt1 cQt8 cbW cpQ3 cpQM cpt cptb ctG ctW ctZ ctei ctlSi ctlTi ctli ctp ctq1 ctq8 ctt1 --do-np -o $OUT_NAME" # For analysis
bryates marked this conversation as resolved.
Show resolved Hide resolved

# Build the run command for filling CR histos
#CFGS="../../input_samples/cfgs/mc_signal_samples_NDSkim.cfg,../../input_samples/cfgs/mc_background_samples_NDSkim.cfg,../../input_samples/cfgs/mc_background_samples_cr_NDSkim.cfg,../../input_samples/cfgs/data_samples_NDSkim.cfg"
Expand Down
9 changes: 9 additions & 0 deletions analysis/topeft_run2/make_cards.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ def main():
parser.add_argument("--chunks","-n",default=1,help="The number of channels each condor job should process")
parser.add_argument("--keep-negative-bins",action="store_true",help="Don't crop negative bins")
parser.add_argument("--wc-vals", default="",action="store", nargs="+", help="Specify the corresponding wc values to set for the wc list")
parser.add_argument("--wc-scalings", default=[],action="extend",nargs="+",help="Specify a list of wc ordering for scalings.json")

args = parser.parse_args()
pkl_file = args.pkl_file
Expand All @@ -189,6 +190,7 @@ def main():
verbose = args.verbose
wc_vals = args.wc_vals

wc_scalings = args.wc_scalings
select_only = args.select_only
use_selected = args.use_selected

Expand All @@ -212,6 +214,7 @@ def main():
"verbose": verbose,
"year_lst": years,
"wc_vals": wc_vals,
"wc_scalings": wc_scalings,
}

if out_dir != "." and not os.path.exists(out_dir):
Expand Down Expand Up @@ -285,6 +288,12 @@ def main():
run_condor(dc,pkl_file,out_dir,dists,ch_lst,chunks)
else:
run_local(dc,dists,ch_lst,selected_wcs, not args.keep_negative_bins, wcs_dict)

# make pre-selection scalings.json
print("Making scalings-preselect.json file...")
with open(os.path.join(out_dir,"scalings-preselect.json"),"w") as f:
json.dump(dc.scalings, f, indent=4)

dt = time.time() - tic
print(f"Total Time: {dt:.2f} s")
print("Finished!")
Expand Down
1 change: 1 addition & 0 deletions analysis/topeft_run2/run_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ def LoadJsonToSampleName(jsonFile, prefix):

# Extract the list of all WCs, as long as we haven't already specified one.
if len(wc_lst) == 0:
print("WARNING: specify an ordered list of wcs for scalings, otherwise the order would be arbitrary.")
bryates marked this conversation as resolved.
Show resolved Hide resolved
for k in samplesdict.keys():
for wc in samplesdict[k]['WCnames']:
if wc not in wc_lst:
Expand Down
37 changes: 35 additions & 2 deletions topeft/modules/datacard_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,12 @@
self.coeffs = kwargs.pop("wcs",[])
self.use_real_data = kwargs.pop("unblind",False)
self.verbose = kwargs.pop("verbose",True)
self.wc_scalings = kwargs.pop("wc_scalings",[])
self.scalings = []

# get wc ranges from json
with open(topeft_path("params/wc_ranges.json"), "r") as wc_ranges_json:
self.wc_ranges = json.load(wc_ranges_json)

if self.year_lst:
for yr in self.year_lst:
Expand Down Expand Up @@ -763,6 +769,25 @@
print(f"WC Selection Time: {dt:.2f} s")
return selected_wcs

def make_scalings_json(self,scalings_json,ch,km_dist,p,wc_names,scalings):
scalings = scalings.tolist()
scalings_json.append(
{
"channel": ch + "_" + str(km_dist),
"process": p + "_sm", # NOTE: needs to be in the datacard
"parameters": ["cSM[1]"]
+ [self.format_wc(wcname) for wcname in wc_names],
"scaling":
scalings[1:] # exclude underflow bin
,
}
)
return scalings_json

def format_wc(self,wcname):
lo, hi = self.wc_ranges[wcname]
return "%s[0,%.1f,%.1f]" % (wcname, lo, hi)

def analyze(self,km_dist,ch,selected_wcs, crop_negative_bins, wcs_dict):
""" Handles the EFT decomposition and the actual writing of the ROOT and text datacard files."""
if not km_dist in self.hists:
Expand Down Expand Up @@ -829,6 +854,7 @@
raise RuntimeError("filling obs data more than once!")
for sp_key,arr in data_sm.items():
data_obs += arr
decomposed_templates = {k: v for k, v in decomposed_templates.items() if k == 'sm'}
for base,v in decomposed_templates.items():
proc_name = f"{p}_{base}"
col_width = max(len(proc_name),col_width)
Expand Down Expand Up @@ -930,6 +956,14 @@
if p == "tllq" or p == "tHq":
# Handle the 'missing_parton' uncertainty
pass
# obtain the scalings for scalings.json file
if p in ("tHq", "tllq", "ttH", "ttll", "ttlnu", "tttt"):
bryates marked this conversation as resolved.
Show resolved Hide resolved
if self.wc_scalings:
scalings = h[{'channel':ch,'process':p,'systematic':'nominal'}].make_scaling(self.wc_scalings)
self.scalings_json = self.make_scalings_json(self.scalings,ch,km_dist,p,self.wc_scalings,scalings)

Check warning on line 963 in topeft/modules/datacard_tools.py

View check run for this annotation

Codecov / codecov/patch

topeft/modules/datacard_tools.py#L962-L963

Added lines #L962 - L963 were not covered by tests
else:
scalings = h[{'channel':ch,'process':p,'systematic':'nominal'}].make_scaling()
self.scalings_json = self.make_scalings_json(self.scalings,ch,km_dist,p,h.wc_names,scalings)
f["data_obs"] = to_hist(data_obs,"data_obs")

line_break = "##----------------------------------\n"
Expand Down Expand Up @@ -1127,8 +1161,7 @@
print(f"\tDecompose Time: {dt:.2f} s")
print(f"\tTotal terms: {terms}")

return r

return r

if __name__ == '__main__':
fpath = topeft_path("../analysis/topEFT/histos/may18_fullRun2_withSys_anatest08_np.pkl.gz")
Expand Down
28 changes: 28 additions & 0 deletions topeft/params/wc_ranges.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"cQQ1": [-6.0, 6.0],
"cQei": [-4.0, 4.0],
"cQl3i": [-5.5, 5.5],
"cQlMi": [-4.0, 4.0],
"cQq11": [-0.7, 0.7],
"cQq13": [-0.35, 0.35],
"cQq81": [-1.7, 1.5],
"cQq83": [-0.6, 0.6],
"cQt1": [-6.0, 6.0],
"cQt8": [-10.0, 10.0],
"cbW": [-3.0, 3.0],
"cpQ3": [-4.0, 4.0],
"cpQM": [-15.0, 20.0],
"cpt": [-15.0, 15.0],
"cptb": [-9.0, 9.0],
"ctG": [-0.8, 0.8],
"ctW": [-1.5, 1.5],
"ctZ": [-2.0, 2.0],
"ctei": [-4.0, 4.0],
"ctlSi": [-5.0, 5.0],
"ctlTi": [-0.9, 0.9],
"ctli": [-4.0, 4.0],
"ctp": [-15.0, 40.0],
"ctq1": [-0.6, 0.6],
"ctq8": [-1.4, 1.4],
"ctt1": [-2.6, 2.6]
}
Loading