Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update datacardMaker to make IM scalings #434

Merged
merged 16 commits into from
Dec 10, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 62 additions & 51 deletions analysis/topeft_run2/datacards_post_processing.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
import shutil
import argparse

import json
# This script does some basic checks of the cards and templates produced by the `make_cards.py` script.
# - It also can parse the condor log files and dump a summary of the contents
# - Additionally, it can also grab the right set of ptz and lj0pt templates (for the right categories) used in TOP-22-006
Expand All @@ -17,50 +17,49 @@
# The list of ptz and lj0pt we choose to use in each category for TOP-22-006
TOP22006_CATEGORIES = [

"ttx_multileptons-3l_onZ_1b_2j_ptz",
"ttx_multileptons-3l_onZ_1b_3j_ptz",
"ttx_multileptons-3l_onZ_1b_4j_ptz",
"ttx_multileptons-3l_onZ_1b_5j_ptz",
"ttx_multileptons-3l_onZ_2b_4j_ptz",
"ttx_multileptons-3l_onZ_2b_5j_ptz",

"ttx_multileptons-2lss_4t_m_4j_lj0pt",
"ttx_multileptons-2lss_4t_m_5j_lj0pt",
"ttx_multileptons-2lss_4t_m_6j_lj0pt",
"ttx_multileptons-2lss_4t_m_7j_lj0pt",
"ttx_multileptons-2lss_4t_p_4j_lj0pt",
"ttx_multileptons-2lss_4t_p_5j_lj0pt",
"ttx_multileptons-2lss_4t_p_6j_lj0pt",
"ttx_multileptons-2lss_4t_p_7j_lj0pt",
"ttx_multileptons-2lss_m_4j_lj0pt",
"ttx_multileptons-2lss_m_5j_lj0pt",
"ttx_multileptons-2lss_m_6j_lj0pt",
"ttx_multileptons-2lss_m_7j_lj0pt",
"ttx_multileptons-2lss_p_4j_lj0pt",
"ttx_multileptons-2lss_p_5j_lj0pt",
"ttx_multileptons-2lss_p_6j_lj0pt",
"ttx_multileptons-2lss_p_7j_lj0pt",
"ttx_multileptons-3l_m_offZ_1b_2j_lj0pt",
"ttx_multileptons-3l_m_offZ_1b_3j_lj0pt",
"ttx_multileptons-3l_m_offZ_1b_4j_lj0pt",
"ttx_multileptons-3l_m_offZ_1b_5j_lj0pt",
"ttx_multileptons-3l_m_offZ_2b_2j_lj0pt",
"ttx_multileptons-3l_m_offZ_2b_3j_lj0pt",
"ttx_multileptons-3l_m_offZ_2b_4j_lj0pt",
"ttx_multileptons-3l_m_offZ_2b_5j_lj0pt",
"ttx_multileptons-3l_onZ_2b_2j_lj0pt",
"ttx_multileptons-3l_onZ_2b_3j_lj0pt",
"ttx_multileptons-3l_p_offZ_1b_2j_lj0pt",
"ttx_multileptons-3l_p_offZ_1b_3j_lj0pt",
"ttx_multileptons-3l_p_offZ_1b_4j_lj0pt",
"ttx_multileptons-3l_p_offZ_1b_5j_lj0pt",
"ttx_multileptons-3l_p_offZ_2b_2j_lj0pt",
"ttx_multileptons-3l_p_offZ_2b_3j_lj0pt",
"ttx_multileptons-3l_p_offZ_2b_4j_lj0pt",
"ttx_multileptons-3l_p_offZ_2b_5j_lj0pt",
"ttx_multileptons-4l_2j_lj0pt",
"ttx_multileptons-4l_3j_lj0pt",
"ttx_multileptons-4l_4j_lj0pt",
"2lss_4t_m_4j_lj0pt",
"2lss_4t_m_5j_lj0pt",
"2lss_4t_m_6j_lj0pt",
"2lss_4t_m_7j_lj0pt",
"2lss_4t_p_4j_lj0pt",
"2lss_4t_p_5j_lj0pt",
"2lss_4t_p_6j_lj0pt",
"2lss_4t_p_7j_lj0pt",
"2lss_m_4j_lj0pt",
"2lss_m_5j_lj0pt",
"2lss_m_6j_lj0pt",
"2lss_m_7j_lj0pt",
"2lss_p_4j_lj0pt",
"2lss_p_5j_lj0pt",
"2lss_p_6j_lj0pt",
"2lss_p_7j_lj0pt",
"3l_m_offZ_1b_2j_lj0pt",
"3l_m_offZ_1b_3j_lj0pt",
"3l_m_offZ_1b_4j_lj0pt",
"3l_m_offZ_1b_5j_lj0pt",
"3l_m_offZ_2b_2j_lj0pt",
"3l_m_offZ_2b_3j_lj0pt",
"3l_m_offZ_2b_4j_lj0pt",
"3l_m_offZ_2b_5j_lj0pt",
"3l_onZ_1b_2j_ptz",
"3l_onZ_1b_3j_ptz",
"3l_onZ_1b_4j_ptz",
"3l_onZ_1b_5j_ptz",
"3l_onZ_2b_2j_lj0pt",
"3l_onZ_2b_3j_lj0pt",
"3l_onZ_2b_4j_ptz",
"3l_onZ_2b_5j_ptz",
"3l_p_offZ_1b_2j_lj0pt",
"3l_p_offZ_1b_3j_lj0pt",
"3l_p_offZ_1b_4j_lj0pt",
"3l_p_offZ_1b_5j_lj0pt",
"3l_p_offZ_2b_2j_lj0pt",
"3l_p_offZ_2b_3j_lj0pt",
"3l_p_offZ_2b_4j_lj0pt",
"3l_p_offZ_2b_5j_lj0pt",
"4l_2j_lj0pt",
"4l_3j_lj0pt",
"4l_4j_lj0pt",
]

# Return list of lines in a file
Expand Down Expand Up @@ -90,6 +89,9 @@ def main():

###### Print out general info ######

with open('scalings-preselect.json', 'r') as file:
scalings_content = json.load(file)

# Count the number of text data cards and root templates
n_text_cards = 0
n_root_templates = 0
Expand Down Expand Up @@ -135,8 +137,6 @@ def main():
for line in lines_from_condor_out_to_print:
print(f"\t\t* In {line[0]}: {line[1]}")



####### Copy the TOP-22-006 relevant files to their own dir ######

# Grab the ptz-lj0pt cards we want for TOP-22-006, copy into a dir
Expand All @@ -148,13 +148,24 @@ def main():
print(f"\nCopying TOP-22-006 relevant files to {ptzlj0pt_path}...")
for fname in datacard_files:
file_name_strip_ext = os.path.splitext(fname)[0]
if file_name_strip_ext in TOP22006_CATEGORIES:
shutil.copyfile(os.path.join(args.datacards_path,fname),os.path.join(ptzlj0pt_path,fname))
if fname.endswith(".txt"): n_txt += 1
if fname.endswith(".root"): n_root += 1
for file in TOP22006_CATEGORIES:
if file in file_name_strip_ext:
shutil.copyfile(os.path.join(args.datacards_path,fname),os.path.join(ptzlj0pt_path,fname))
if fname.endswith(".txt"): n_txt += 1
if fname.endswith(".root"): n_root += 1
#also copy the selectedWCs.txt file
shutil.copyfile(os.path.join(args.datacards_path,"selectedWCs.txt"),os.path.join(ptzlj0pt_path,"selectedWCs.txt"))

for item in scalings_content:
channel_name = item.get("channel")
if channel_name in TOP22006_CATEGORIES:
ch_index = TOP22006_CATEGORIES.index(channel_name) + 1
item["channel"] = "ch" + str(ch_index)
else:
scalings_content = [d for d in scalings_content if d != item]
with open('scalings.json', 'w') as file:
json.dump(scalings_content, file, indent=4)

# Check that we got the expected number and print what we learn
print(f"\tNumber of text templates copied: {n_txt}")
print(f"\tNumber of root templates copied: {n_txt}")
Expand Down
2 changes: 1 addition & 1 deletion analysis/topeft_run2/fullR2_run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ OUT_NAME="example_name"

# Build the run command for filling SR histos
CFGS="../../input_samples/cfgs/mc_signal_samples_NDSkim.cfg,../../input_samples/cfgs/mc_background_samples_NDSkim.cfg,../../input_samples/cfgs/data_samples_NDSkim.cfg"
OPTIONS="--hist-list ana --skip-cr --do-systs -s 50000 --do-np -o $OUT_NAME" # For analysis
OPTIONS="--hist-list ana --skip-cr --do-systs -s 50000 --wc-list cQQ1 cQei cQl3i cQlMi cQq11 cQq13 cQq81 cQq83 cQt1 cQt8 cbW cpQ3 cpQM cpt cptb ctG ctW ctZ ctei ctlSi ctlTi ctli ctp ctq1 ctq8 ctt1 --do-np -o $OUT_NAME" # For analysis
bryates marked this conversation as resolved.
Show resolved Hide resolved

# Build the run command for filling CR histos
#CFGS="../../input_samples/cfgs/mc_signal_samples_NDSkim.cfg,../../input_samples/cfgs/mc_background_samples_NDSkim.cfg,../../input_samples/cfgs/mc_background_samples_cr_NDSkim.cfg,../../input_samples/cfgs/data_samples_NDSkim.cfg"
Expand Down
2 changes: 2 additions & 0 deletions analysis/topeft_run2/make_cards.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ def run_local(dc,km_dists,channels,selected_wcs, crop_negative_bins, wcs_dict):
for ch in matched_chs:
r = dc.analyze(km_dist,ch,selected_wcs, crop_negative_bins, wcs_dict)

with open("scalings-preselect.json", "w") as fout:
json.dump(dc.scalings, fout, indent=4)
# VERY IMPORTANT:
# This setup assumes the output directory is mounted on the remote condor machines
# Note:
Expand Down
1 change: 1 addition & 0 deletions analysis/topeft_run2/run_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ def LoadJsonToSampleName(jsonFile, prefix):

# Extract the list of all WCs, as long as we haven't already specified one.
if len(wc_lst) == 0:
print("WARNING: specify an ordered list of wcs for scalings, otherwise the order would be arbitrary.")
bryates marked this conversation as resolved.
Show resolved Hide resolved
for k in samplesdict.keys():
for wc in samplesdict[k]['WCnames']:
if wc not in wc_lst:
Expand Down
10 changes: 8 additions & 2 deletions topeft/modules/datacard_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,7 @@ def __init__(self,pkl_path,**kwargs):
self.coeffs = kwargs.pop("wcs",[])
self.use_real_data = kwargs.pop("unblind",False)
self.verbose = kwargs.pop("verbose",True)
self.scalings = []

if self.year_lst:
for yr in self.year_lst:
Expand Down Expand Up @@ -829,6 +830,7 @@ def analyze(self,km_dist,ch,selected_wcs, crop_negative_bins, wcs_dict):
raise RuntimeError("filling obs data more than once!")
for sp_key,arr in data_sm.items():
data_obs += arr
decomposed_templates = {k: v for k, v in decomposed_templates.items() if k == 'sm'}
for base,v in decomposed_templates.items():
proc_name = f"{p}_{base}"
col_width = max(len(proc_name),col_width)
Expand Down Expand Up @@ -930,6 +932,11 @@ def analyze(self,km_dist,ch,selected_wcs, crop_negative_bins, wcs_dict):
if p == "tllq" or p == "tHq":
# Handle the 'missing_parton' uncertainty
pass

# obtain the scalings for scalings.json file
if p != "fakes":
self.scalings = h.make_scalings_content(self.scalings,ch,km_dist,p,h.wc_names,h.make_scalings(h,ch,p))

f["data_obs"] = to_hist(data_obs,"data_obs")

line_break = "##----------------------------------\n"
Expand Down Expand Up @@ -1127,8 +1134,7 @@ def decompose(self,h,sumw2,wcs,vals={}):
print(f"\tDecompose Time: {dt:.2f} s")
print(f"\tTotal terms: {terms}")

return r

return r

if __name__ == '__main__':
fpath = topeft_path("../analysis/topEFT/histos/may18_fullRun2_withSys_anatest08_np.pkl.gz")
Expand Down
Loading