Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ability to treat holes in run + Smaller fixes #1854

Merged
merged 2 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion MC/bin/o2dpg_sim_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -1604,7 +1604,7 @@ def addQCPerTF(taskName, needs, readerCommand, configFilePath, objectsFile=''):
POOL_merge_task = createTask(name='poolmerge', needs=wfneeds, lab=["POOL"], mem='2000', cpu='1')
POOL_merge_task['cmd'] = '${O2DPG_ROOT}/UTILS/root_merger.py -o evtpool.root -i ' + ','.join(tfpool)
# also create the stat file with the event count
POOL_merge_task['cmd'] += '; RC=$?; root -l -q -b -e "auto f=TFile::Open(\\\"evtpool.root\\\"); auto t=(TTree*)f->Get(\\\"o2sim\\\"); int n=t->GetEntries(); std::ofstream((\\\"0_0_0_\\\"+std::to_string(n)+\\\".stat\\\").c_str()).close();" ; [[ ${RC} == 0 ]]'
POOL_merge_task['cmd'] += '; RC=$?; root -l -q -b -e "auto f=TFile::Open(\\\"evtpool.root\\\"); auto t=(TTree*)f->Get(\\\"o2sim\\\"); int n=t->GetEntries(); std::ofstream((\\\"0_0_0_\\\"+std::to_string(n)+\\\".stat\\\").c_str()) << \\\"# MonaLisa stat file for event pools\\\";" ; [[ ${RC} == 0 ]]'
workflow['stages'].append(POOL_merge_task)

# adjust for alternate (RECO) software environments
Expand Down
58 changes: 55 additions & 3 deletions MC/bin/o2dpg_sim_workflow_anchored.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import re
import json
import math
import pandas as pd

# Creates a time anchored MC workflow; positioned within a given run-number (as function of production size etc)

Expand Down Expand Up @@ -295,12 +296,52 @@ def determine_timestamp(sor, eor, splitinfo, cycle, ntf, HBF_per_timeframe = 256
production_offset = int(thisjobID * maxcycles) + cycle
# add the time difference of this slot to start-of-run to get the final timestamp
timestamp_of_production = sor + production_offset * ntf * HBF_per_timeframe * LHCOrbitMUS / 1000
# this is a closure test. If we had prefect floating point precision everywhere, it wouldn't fail.
# this is a closure test. If we had perfect floating point precision everywhere, it wouldn't fail.
# But since we don't have that and there are some int casts as well, better check again.
assert (timestamp_of_production >= sor)
assert (timestamp_of_production <= eor)
return int(timestamp_of_production), production_offset


def exclude_timestamp(ts, orbit, run, filename):
"""
Checks if timestamp ts (or orbit) falls within a bad data period.
Returns true if this timestamp should be excluded; false otherwise

ts is supposed to be in milliseconds
orbit is some orbit after the orbitreset of the run
"""
if len(filename) == 0:
return False

if not os.path.isfile(filename):
return False

# read txt file into a pandas dataframe ---> if this fails catch exception and return
df = pd.read_csv(filename, header=None, names=["Run", "From", "To", "Message"])
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that the CTFReader accepts delimiters like ,, ;, space and tab.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did this based on the example file that was given to me. Wouldn't it be easier to have a simple rule for this file?
And I am not sure that space as a delimiter is useful because the example file contained words like FC unstable which would probably be split incorrectly.

Will make the code more generic tomorrow. For now it works and I need to have it in the nightly tag for testing tomorrow/


# extract data for this run number
filtered = df[df['Run'] == run]

# now extract from and to lists
exclude_list = list(zip(filtered["From"].to_list() , filtered["To"].to_list()))

if len(exclude_list) == 0:
return False

data_is_in_orbits = exclude_list[0][0] < 1514761200000

if data_is_in_orbits:
for orbitspan in exclude_list:
if orbitspan[0] <= orbit and orbit <= orbitspan[1]:
return True
else:
for timespan in exclude_list:
if timespan[0] <= ts and ts <= timespan[1]:
return True

return False

def main():
parser = argparse.ArgumentParser(description='Creates an O2DPG simulation workflow, anchored to a given LHC run. The workflows are time anchored at regular positions within a run as a function of production size, split-id and cycle.')

Expand All @@ -312,6 +353,7 @@ def main():
parser.add_argument("-tf", type=int, help="number of timeframes per job", default=1)
parser.add_argument("--ccdb-IRate", type=bool, help="whether to try fetching IRate from CCDB/CTP", default=True)
parser.add_argument("--trig-eff", type=float, dest="trig_eff", help="Trigger eff needed for IR", default=-1.0)
parser.add_argument("--run-time-span-file", type=str, dest="run_span_file", help="Run-time-span-file for exclusions of timestamps (bad data periods etc.)", default="")
parser.add_argument('forward', nargs=argparse.REMAINDER) # forward args passed to actual workflow creation
args = parser.parse_args()
print (args)
Expand All @@ -329,6 +371,12 @@ def main():

# determine timestamp, and production offset for the final MC job to run
timestamp, prod_offset = determine_timestamp(run_start, run_end, [args.split_id - 1, args.prod_split], args.cycle, args.tf, GLOparams["OrbitsPerTF"])
# determine orbit corresponding to timestamp
orbit = GLOparams["FirstOrbit"] + (timestamp - GLOparams["SOR"]) / LHCOrbitMUS

# check if timestamp is to be excluded
# what to do in case of
job_is_exluded = exclude_timestamp(timestamp, orbit, args.run_number, args.run_span_file)

# this is anchored to
print ("Determined start-of-run to be: ", run_start)
Expand Down Expand Up @@ -402,8 +450,12 @@ def main():
+ str(GLOparams["FirstOrbit"]) + " -field ccdb -bcPatternFile ccdb" + " --orbitsPerTF " + str(GLOparams["OrbitsPerTF"]) + " -col " + str(ColSystem) + " -eCM " + str(eCM) + ' --readoutDets ' + GLOparams['detList']
print ("forward args ", forwardargs)
cmd = "${O2DPG_ROOT}/MC/bin/o2dpg_sim_workflow.py " + forwardargs
print ("Creating time-anchored workflow...")
os.system(cmd)

if job_is_exluded:
print ("TIMESTAMP IS EXCLUDED IN RUN")
else:
print ("Creating time-anchored workflow...")
os.system(cmd)

if __name__ == "__main__":
sys.exit(main())
17 changes: 14 additions & 3 deletions MC/run/ANCHOR/anchorMC.sh
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,8 @@ MODULES="--skipModules ZDC"
ALICEO2_CCDB_LOCALCACHE=${ALICEO2_CCDB_LOCALCACHE:-$(pwd)/ccdb}

# these arguments will be digested by o2dpg_sim_workflow_anchored.py
baseargs="-tf ${NTIMEFRAMES} --split-id ${SPLITID} --prod-split ${PRODSPLIT} --cycle ${CYCLE} --run-number ${ALIEN_JDL_LPMRUNNUMBER}"
baseargs="-tf ${NTIMEFRAMES} --split-id ${SPLITID} --prod-split ${PRODSPLIT} --cycle ${CYCLE} --run-number ${ALIEN_JDL_LPMRUNNUMBER} \
${ALIEN_JDL_RUN_TIME_SPAN_FILE:+--run-time-span-file ${ALIEN_JDL_RUN_TIME_SPAN_FILE}}"

# these arguments will be passed as well but only evetually be digested by o2dpg_sim_workflow.py which is called from o2dpg_sim_workflow_anchored.py
remainingargs="-seed ${SEED} -ns ${NSIGEVENTS} --include-local-qc --pregenCollContext"
Expand All @@ -223,17 +224,27 @@ remainingargs="${ALIEN_JDL_ANCHOR_SIM_OPTIONS} ${remainingargs} --anchor-config
echo_info "baseargs passed to o2dpg_sim_workflow_anchored.py: ${baseargs}"
echo_info "remainingargs forwarded to o2dpg_sim_workflow.py: ${remainingargs}"

anchoringLogFile=timestampsampling_${ALIEN_JDL_LPMRUNNUMBER}.log
# query CCDB has changed, w/o "_"
${O2DPG_ROOT}/MC/bin/o2dpg_sim_workflow_anchored.py ${baseargs} -- ${remainingargs} &> timestampsampling_${ALIEN_JDL_LPMRUNNUMBER}.log
${O2DPG_ROOT}/MC/bin/o2dpg_sim_workflow_anchored.py ${baseargs} -- ${remainingargs} &> ${anchoringLogFile}
WF_RC="${?}"
if [ "${WF_RC}" != "0" ] ; then
echo_error "Problem during anchor timestamp sampling and workflow creation. Exiting."
exit ${WF_RC}
fi

TIMESTAMP=`grep "Determined timestamp to be" timestampsampling_${ALIEN_JDL_LPMRUNNUMBER}.log | awk '//{print $6}'`
TIMESTAMP=`grep "Determined timestamp to be" ${anchoringLogFile} | awk '//{print $6}'`
echo_info "TIMESTAMP IS ${TIMESTAMP}"

# check if this job is exluded because it falls inside a bad data-taking period
ISEXCLUDED=$(grep "TIMESTAMP IS EXCLUDED IN RUN" ${anchoringLogFile})
if [ "${ISEXCLUDED}" ]; then
# we can quit here; there is nothing to do
# (apart from maybe creating a fake empty AO2D.root file or the like)
echo "Timestamp is excluded from run. Nothing to do here"
exit 0
fi

# -- Create aligned geometry using ITS ideal alignment to avoid overlaps in geant
CCDBOBJECTS_IDEAL_MC="ITS/Calib/Align"
TIMESTAMP_IDEAL_MC=1
Expand Down
Loading