Skip to content

Commit

Permalink
Merge pull request #377 from davidwalter2/240110_AppendHistmakerSupport
Browse files Browse the repository at this point in the history
Add support for appending histmaker analysis output to existing file
  • Loading branch information
davidwalter2 authored Jan 11, 2024
2 parents 20dbb76 + a219a1c commit 2b2950d
Show file tree
Hide file tree
Showing 14 changed files with 96 additions and 23 deletions.
2 changes: 1 addition & 1 deletion scripts/histmakers/mw_lowPU.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,4 +475,4 @@ def build_graph_cutFlow(df, dataset):
scale_to_data(resultdict)
aggregate_groups(datasets, resultdict, groups_to_aggregate)

output_tools.write_analysis_output(resultdict, f"mw_lowPU_{flavor}.hdf5", args, update_name=not args.forceDefaultName)
output_tools.write_analysis_output(resultdict, f"mw_lowPU_{flavor}.hdf5", args)
2 changes: 1 addition & 1 deletion scripts/histmakers/mw_with_mu_eta_pt.py
Original file line number Diff line number Diff line change
Expand Up @@ -539,4 +539,4 @@ def build_graph(df, dataset):
scale_to_data(resultdict)
aggregate_groups(datasets, resultdict, groups_to_aggregate)

output_tools.write_analysis_output(resultdict, f"{os.path.basename(__file__).replace('py', 'hdf5')}", args, update_name=not args.forceDefaultName)
output_tools.write_analysis_output(resultdict, f"{os.path.basename(__file__).replace('py', 'hdf5')}", args)
2 changes: 1 addition & 1 deletion scripts/histmakers/mz_dilepton.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,4 +406,4 @@ def build_graph(df, dataset):
scale_to_data(resultdict)
aggregate_groups(datasets, resultdict, args.aggregateGroups)

output_tools.write_analysis_output(resultdict, f"{os.path.basename(__file__).replace('py', 'hdf5')}", args, update_name=not args.forceDefaultName)
output_tools.write_analysis_output(resultdict, f"{os.path.basename(__file__).replace('py', 'hdf5')}", args)
2 changes: 1 addition & 1 deletion scripts/histmakers/mz_lowPU.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,4 +352,4 @@ def build_graph(df, dataset):
scale_to_data(resultdict)
aggregate_groups(datasets, resultdict, args.aggregateGroups)

output_tools.write_analysis_output(resultdict, f"mz_lowPU_{flavor}.hdf5", args, update_name=not args.forceDefaultName)
output_tools.write_analysis_output(resultdict, f"mz_lowPU_{flavor}.hdf5", args)
2 changes: 1 addition & 1 deletion scripts/histmakers/mz_wlike_with_mu_eta_pt.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,4 +332,4 @@ def build_graph(df, dataset):
scale_to_data(resultdict)
aggregate_groups(datasets, resultdict, args.aggregateGroups)

output_tools.write_analysis_output(resultdict, f"{os.path.basename(__file__).replace('py', 'hdf5')}", args, update_name=not args.forceDefaultName)
output_tools.write_analysis_output(resultdict, f"{os.path.basename(__file__).replace('py', 'hdf5')}", args)
4 changes: 2 additions & 2 deletions scripts/histmakers/w_z_gen_dists.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ def build_graph(df, dataset):
return results, weightsum

resultdict = narf.build_and_run(datasets, build_graph)
output_tools.write_analysis_output(resultdict, f"{os.path.basename(__file__).replace('py', 'hdf5')}", args, update_name=not args.forceDefaultName)
output_tools.write_analysis_output(resultdict, f"{os.path.basename(__file__).replace('py', 'hdf5')}", args)

logger.info("computing angular coefficients")
z_moments = None
Expand Down Expand Up @@ -308,4 +308,4 @@ def build_graph(df, dataset):
if args.useTheoryAgnosticBinning:
outfname += "_theoryAgnosticBinning"
outfname += ".hdf5"
output_tools.write_analysis_output(moments_out, outfname, args, update_name=not args.forceDefaultName)
output_tools.write_analysis_output(moments_out, outfname, args)
2 changes: 1 addition & 1 deletion scripts/histmakers/w_z_muonresponse.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,5 +180,5 @@ def build_graph(df, dataset):

resultdict = narf.build_and_run(datasets, build_graph)

output_tools.write_analysis_output(resultdict, f"{os.path.basename(__file__).replace('py', 'hdf5')}", args, update_name=not args.forceDefaultName)
output_tools.write_analysis_output(resultdict, f"{os.path.basename(__file__).replace('py', 'hdf5')}", args)

45 changes: 45 additions & 0 deletions scripts/utilities/run_histmakers.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Due to memory constraints run the histmaker multiple times and append the output file gradually
# run e.g. source scripts/utilities/run_histmakers.sh wmass /scratch/$USER/results_histmaker/ nominal --unfolding --genVars ptGen absEtaGen --genBins 32 24 --pt 32 25 57 --noAuxiliaryHistograms

if [[ $# -lt 3 ]]; then
echo "Requires at least three arguments: run_histmakers.sh <MODE> <OUTPUT_DIR> <POSTFIX> (<OPTIONAL OPTS>)"
exit 1
fi

MODE=$1
OUTPUT_DIR=$2
POSTFIX=$3
shift
shift
shift

if [ "$MODE" == "wmass" ]; then
HISTMAKER="mw_with_mu_eta_pt"
separateProcs=("WminusmunuPostVFP" "WplusmunuPostVFP" "WminustaunuPostVFP" "WplustaunuPostVFP")
elif [ "$MODE" == "wlike" ]; then
HISTMAKER="mz_wlike_with_mu_eta_pt"
separateProcs=("ZmumuPostVFP" "ZtautauPostVFP")
elif [ "$MODE" == "dilepton" ]; then
HISTMAKER="mz_dilepton"
separateProcs=("ZmumuPostVFP" "ZtautauPostVFP")
fi

OUTPUT_FILE=$OUTPUT_DIR/${HISTMAKER}_${POSTFIX}.hdf5

OPTS="--forceDefaultName --postfix $POSTFIX $@"

CMD="python ./scripts/histmakers/${HISTMAKER}.py \
-o $OUTPUT_DIR $OPTS --excludeProcs ${separateProcs[@]}"
# echo $CMD
eval $CMD

# Processes that should be processed individually
for proc in "${separateProcs[@]}"; do
CMD="python ./scripts/histmakers/${HISTMAKER}.py \
--appendOutputFile $OUTPUT_FILE $OPTS --filterProcs $proc"
# echo $CMD
eval $CMD
done



1 change: 1 addition & 0 deletions utilities/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ def __call__(self, parser, namespace, values, option_string=None):
parser.add_argument("--onlyMainHistograms", action='store_true', help="Only produce some histograms, skipping (most) systematics to run faster when those are not needed")
parser.add_argument("--met", type=str, choices=["DeepMETReso", "RawPFMET"], help="MET (DeepMETReso or RawPFMET)", default="DeepMETReso")
parser.add_argument("-o", "--outfolder", type=str, default="", help="Output folder")
parser.add_argument("--appendOutputFile", type=str, default="", help="Append analysis output to specified output file")
parser.add_argument("-e", "--era", type=str, choices=["2016PreVFP","2016PostVFP", "2017", "2018"], help="Data set to process", default="2016PostVFP")
parser.add_argument("--nonClosureScheme", type=str, default = "A-only", choices=["none", "A-M-separated", "A-M-combined", "binned", "binned-plus-M", "A-only", "M-only"], help = "source of the Z non-closure nuisances")
parser.add_argument("--correlatedNonClosureNP", action="store_false", help="disable the de-correlation of Z non-closure nuisance parameters after the jpsi massfit")
Expand Down
20 changes: 14 additions & 6 deletions utilities/io_tools/input_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@

scetlib_tnp_match_expr = ["^gamma_.*[+|-]\d+", "^b_.*[+|-]\d+", "^s[+|-]\d+", "^h_.*\d+"]

def load_results_h5py(h5file):
if "results" in h5file.keys():
return ioutils.pickle_load_h5py(h5file["results"])
else:
return {k: ioutils.pickle_load_h5py(v) for k,v in h5file.items()}

def read_and_scale_pkllz4(fname, proc, histname, calculate_lumi=False, scale=1):
with lz4.frame.open(fname) as f:
results = pickle.load(f)
Expand All @@ -24,29 +30,29 @@ def read_and_scale_pkllz4(fname, proc, histname, calculate_lumi=False, scale=1):

def read_hist_names(fname, proc):
with h5py.File(fname, "r") as h5file:
results = ioutils.pickle_load_h5py(h5file["results"])
results = load_results_h5py(h5file)
if proc not in results:
raise ValueError(f"Invalid process {proc}! No output found in file {fname}")
return results[proc]["output"].keys()

def read_keys(fname):
with h5py.File(fname, "r") as h5file:
results = ioutils.pickle_load_h5py(h5file["results"])
results = load_results_h5py(h5file)
return results.keys()

def read_xsec(fname, proc):
with h5py.File(fname, "r") as h5file:
results = ioutils.pickle_load_h5py(h5file["results"])
results = load_results_h5py(h5file)
return results[proc]["dataset"]["xsec"]

def read_sumw(fname, proc):
with h5py.File(fname, "r") as h5file:
results = ioutils.pickle_load_h5py(h5file["results"])
results = load_results_h5py(h5file)
return results[proc]["weight_sum"]

def read_and_scale(fname, proc, histname, calculate_lumi=False, scale=1, apply_xsec=True):
with h5py.File(fname, "r") as h5file:
results = ioutils.pickle_load_h5py(h5file["results"])
results = load_results_h5py(h5file)

return load_and_scale(results, proc, histname, calculate_lumi, scale, apply_xsec)

Expand All @@ -68,7 +74,7 @@ def load_and_scale(res_dict, proc, histname, calculate_lumi=False, scale=1., app

def read_all_and_scale(fname, procs, histnames, lumi=False):
h5file = h5py.File(fname, "r")
results = ioutils.pickle_load_h5py(h5file["results"])
results = load_results_h5py(h5file)

hists = []
for histname in histnames:
Expand Down Expand Up @@ -417,6 +423,8 @@ def get_metadata(infile):
results = pickle.load(f)
elif infile.endswith(".hdf5"):
h5file = h5py.File(infile, "r")
if "meta_info" in h5file.keys():
return ioutils.pickle_load_h5py(h5file["meta_info"])
meta = h5file.get("results", h5file.get("meta", None))
results = ioutils.pickle_load_h5py(meta) if meta else None

Expand Down
27 changes: 22 additions & 5 deletions utilities/io_tools/output_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,8 @@ def writeMetaInfoToRootFile(rtfile, exclude_diff='notebooks', args=None):
out = ROOT.TNamed(str(key), str(value))
out.Write()

def write_analysis_output(results, outfile, args, update_name=True):
def write_analysis_output(results, outfile, args):
analysis_debug_output(results)
results.update({"meta_info" : narf.ioutils.make_meta_info_dict(args=args, wd=common.base_dir)})

to_append = []
if args.theoryCorr and not args.theoryCorrAltOnly:
Expand All @@ -63,7 +62,7 @@ def write_analysis_output(results, outfile, args, update_name=True):
if hasattr(args, "ptqVgen") and args.ptqVgen:
to_append.append("vars_qtbyQ")

if to_append and update_name:
if to_append and not args.forceDefaultName:
outfile = outfile.replace(".hdf5", f"_{'_'.join(to_append)}.hdf5")

if args.postfix:
Expand All @@ -75,9 +74,27 @@ def write_analysis_output(results, outfile, args, update_name=True):
os.makedirs(args.outfolder)
outfile = os.path.join(args.outfolder, outfile)

if args.appendOutputFile:
outfile = args.appendOutputFile
if os.path.isfile(outfile):
logger.info(f"Analysis output will be appended to file {outfile}")
open_as="a"
else:
logger.warning(f"Analysis output requested to be appended to file {outfile}, but the file does not exist yet, it will be created instead")
open_as="w"
else:
if os.path.isfile(outfile):
logger.warning(f"Output file {outfile} exists already, it will be overwritten")
open_as="w"

time0 = time.time()
with h5py.File(outfile, 'w') as f:
narf.ioutils.pickle_dump_h5py("results", results, f)
with h5py.File(outfile, open_as) as f:
for k, v in results.items():
logger.debug(f"Pickle and dump {k}")
narf.ioutils.pickle_dump_h5py(k, v, f)

if "meta_info" not in f.keys():
narf.ioutils.pickle_dump_h5py("meta_info", narf.ioutils.make_meta_info_dict(args=args, wd=common.base_dir), f)

logger.info(f"Writing output: {time.time()-time0}")
logger.info(f"Output saved in {outfile}")
Expand Down
3 changes: 2 additions & 1 deletion wremnants/datasets/datagroups.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from utilities import boostHistHelpers as hh,common,logging
from utilities.io_tools import input_tools
import lz4.frame
import pickle
import h5py
Expand Down Expand Up @@ -29,7 +30,7 @@ def __init__(self, infile, mode=None, **kwargs):
elif infile.endswith(".hdf5"):
logger.info("Load input file")
self.h5file = h5py.File(infile, "r")
self.results = narf.ioutils.pickle_load_h5py(self.h5file["results"])
self.results = input_tools.load_results_h5py(self.h5file)
else:
raise ValueError(f"{infile} has unsupported file type")

Expand Down
3 changes: 2 additions & 1 deletion wremnants/helicity_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from .theory_tools import moments_to_angular_coeffs
from utilities import common, logging
from utilities import boostHistHelpers as hh
from utilities.io_tools import input_tools
import numpy as np
import h5py
import hdf5plugin
Expand All @@ -31,7 +32,7 @@ def makehelicityWeightHelper(is_w_like = False, filename=None):
if filename is None:
filename = f"{common.data_dir}/angularCoefficients/w_z_moments_theoryAgnosticBinning.hdf5"
with h5py.File(filename, "r") as ff:
out = narf.ioutils.pickle_load_h5py(ff["results"])
out = input_tools.load_results_h5py(ff)

moments = out["Z"] if is_w_like else out["W"]

Expand Down
4 changes: 2 additions & 2 deletions wremnants/theory_corrections.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ def make_qcd_uncertainty_helper_by_helicity(is_w_like = False, filename=None):

# load moments from file
with h5py.File(filename, "r") as h5file:
results = narf.ioutils.pickle_load_h5py(h5file["results"])
results = input_tools.load_results_h5py(h5file)
moments = results["Z"] if is_w_like else results["W"]

moments_nom = moments[{"muRfact" : 1.j, "muFfact" : 1.j}].values()
Expand Down Expand Up @@ -316,7 +316,7 @@ def make_helicity_test_corrector(is_w_like = False, filename = None):

# load moments from file
with h5py.File(filename, "r") as h5file:
results = narf.ioutils.pickle_load_h5py(h5file["results"])
results = input_tools.load_results_h5py(h5file)
moments = results["Z"] if is_w_like else results["W"]

coeffs = theory_tools.moments_to_angular_coeffs(moments)
Expand Down

0 comments on commit 2b2950d

Please sign in to comment.