Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tidy a bit and abort as early as possible #1548

Merged
merged 1 commit into from
Mar 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 61 additions & 43 deletions MC/run/ANCHOR/anchorMC.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,16 @@
# helper functionality #
########################

echo_info()
{
echo "INFO [anchorMC]: ${*}"
}

echo_error()
{
echo "ERROR [anchorMC]: ${*}"
}

print_help()
{
echo "Usage: ./anchorMC.sh"
Expand Down Expand Up @@ -53,7 +63,7 @@ print_help()
# Prevent the script from being soured to omit unexpected surprises when exit is used
SCRIPT_NAME="$(basename "$(test -L "$0" && readlink "$0" || echo "$0")")"
if [ "${SCRIPT_NAME}" != "$(basename ${BASH_SOURCE[0]})" ] ; then
echo "ERROR: This script cannot not be sourced" >&2
echo_error "This script cannot not be sourced" >&2
return 1
fi

Expand All @@ -70,8 +80,15 @@ while [ "$1" != "" ] ; do
done

# make sure O2DPG + O2 is loaded
[ ! "${O2DPG_ROOT}" ] && echo "Error: This needs O2DPG loaded" && exit 1
[ ! "${O2_ROOT}" ] && echo "Error: This needs O2 loaded" && exit 1
[ ! "${O2DPG_ROOT}" ] && echo_error "This needs O2DPG loaded" && exit 1
[ ! "${O2_ROOT}" ] && echo_error "This needs O2 loaded" && exit 1

# check if jq is there
which jq >/dev/null 2>&1
[ "${?}" != "0" ] && { echo_error "jq is not found. Install or load via alienv." ; exit 1 ; }

alien-token-info >/dev/null 2>&1
[ "${?}" != "0" ] && { echo_error "No GRID token found, required to run." ; exit 1 ; }

#################################################################
# Set all required variables to identify an anchored production #
Expand All @@ -87,8 +104,8 @@ export ALIEN_JDL_WORKFLOWDETECTORS=${ALIEN_JDL_WORKFLOWDETECTORS:-ITS,TPC,TOF,FV
export ALIEN_JDL_ANCHOR_SIM_OPTIONS=${ALIEN_JDL_ANCHOR_SIM_OPTIONS:--gen pythia8}
# all others MUST be set by the user/on the outside
export ALIEN_JDL_LPMANCHORPASSNAME=${ALIEN_JDL_LPMANCHORPASSNAME:-${ANCHORPASSNAME}}
export ALIEN_JDL_MCANCHOR=${ALIEN_JDL_MCANCHOR:-${MCANCHOR}}
export ALIEN_JDL_LPMPASSNAME=${ALIEN_JDL_LPMPASSNAME:-${PASSNAME}}
# LPMPASSNAME is used in O2 and O2DPG scripts, however on the other hand, ALIEN_JDL_LPMANCHORPASSNAME is the one that is set in JDL templates; so use ALIEN_JDL_LPMANCHORPASSNAME and set ALIEN_JDL_LPMPASSNAME
export ALIEN_JDL_LPMPASSNAME=${ALIEN_JDL_LPMANCHORPASSNAME}
export ALIEN_JDL_LPMRUNNUMBER=${ALIEN_JDL_LPMRUNNUMBER:-${RUNNUMBER}}
export ALIEN_JDL_LPMPRODUCTIONTYPE=${ALIEN_JDL_LPMPRODUCTIONTYPE:-${PRODUCTIONTYPE}}
export ALIEN_JDL_LPMINTERACTIONTYPE=${ALIEN_JDL_LPMINTERACTIONTYPE:-${INTERACTIONTYPE}}
Expand All @@ -101,29 +118,29 @@ export ALIEN_JDL_ADDTIMESERIESINMC=${ALIEN_JDL_ADDTIMESERIESINMC:-1}

# cache the production tag, will be set to a special anchor tag; reset later in fact
ALIEN_JDL_LPMPRODUCTIONTAG_KEEP=$ALIEN_JDL_LPMPRODUCTIONTAG
echo "Substituting ALIEN_JDL_LPMPRODUCTIONTAG=$ALIEN_JDL_LPMPRODUCTIONTAG with ALIEN_JDL_LPMANCHORPRODUCTION=$ALIEN_JDL_LPMANCHORPRODUCTION for simulating reco pass..."
echo_info "Substituting ALIEN_JDL_LPMPRODUCTIONTAG=$ALIEN_JDL_LPMPRODUCTIONTAG with ALIEN_JDL_LPMANCHORPRODUCTION=$ALIEN_JDL_LPMANCHORPRODUCTION for simulating reco pass..."
ALIEN_JDL_LPMPRODUCTIONTAG=$ALIEN_JDL_LPMANCHORPRODUCTION

# check variables that need to be set
[ -z "${ALIEN_JDL_LPMANCHORPASSNAME}" ] && { echo "ERROR: Set ALIEN_JDL_LPMANCHORPASSNAME or ANCHORPASSNAME" ; exit 1 ; }
[ -z "${ALIEN_JDL_MCANCHOR}" ] && { echo "ERROR: Set ALIEN_JDL_MCANCHOR or MCANCHOR" ; exit 1 ; }
[ -z "${ALIEN_JDL_LPMPASSNAME}" ] && { echo "ERROR: Set ALIEN_JDL_LPMPASSNAME or PASSNAME" ; exit 1 ; }
[ -z "${ALIEN_JDL_LPMRUNNUMBER}" ] && { echo "ERROR: Set ALIEN_JDL_LPMRUNNUMBER or RUNNUMBER" ; exit 1 ; }
[ -z "${ALIEN_JDL_LPMPRODUCTIONTYPE}" ] && { echo "ERROR: Set ALIEN_JDL_LPMPRODUCTIONTYPE or PRODUCTIONTYPE" ; exit 1 ; }
[ -z "${ALIEN_JDL_LPMINTERACTIONTYPE}" ] && { echo "ERROR: Set ALIEN_JDL_LPMINTERACTIONTYPE or INTERACTIONTYPE" ; exit 1 ; }
[ -z "${ALIEN_JDL_LPMPRODUCTIONTAG}" ] && { echo "ERROR: Set ALIEN_JDL_LPMPRODUCTIONTAG or PRODUCTIONTAG" ; exit 1 ; }
[ -z "${ALIEN_JDL_LPMANCHORRUN}" ] && { echo "ERROR: Set ALIEN_JDL_LPMANCHORRUN or ANCHORRUN" ; exit 1 ; }
[ -z "${ALIEN_JDL_LPMANCHORPRODUCTION}" ] && { echo "ERROR: Set ALIEN_JDL_LPMANCHORPRODUCTION or ANCHORPRODUCTION" ; exit 1 ; }
[ -z "${ALIEN_JDL_LPMANCHORYEAR}" ] && { echo "ERROR: Set ALIEN_JDL_LPMANCHORYEAR or ANCHORYEAR" ; exit 1 ; }

[ -z "${NTIMEFRAMES}" ] && { echo "ERROR: Set NTIMEFRAMES" ; exit 1 ; }
[ -z "${NSIGEVENTS}" ] && { echo "ERROR: Set NSIGEVENTS" ; exit 1 ; }
[ -z "${SPLITID}" ] && { echo "ERROR: Set SPLITID" ; exit 1 ; }
[ -z "${CYCLE}" ] && { echo "ERROR: Set CYCLE" ; exit 1 ; }
[ -z "${PRODSPLIT}" ] && { echo "ERROR: Set PRODSPLIT" ; exit 1 ; }
[ -z "${ALIEN_JDL_LPMANCHORPASSNAME}" ] && { echo_error "Set ALIEN_JDL_LPMANCHORPASSNAME or ANCHORPASSNAME" ; exit 1 ; }
[ -z "${ALIEN_JDL_LPMRUNNUMBER}" ] && { echo_error "Set ALIEN_JDL_LPMRUNNUMBER or RUNNUMBER" ; exit 1 ; }
[ -z "${ALIEN_JDL_LPMPRODUCTIONTYPE}" ] && { echo_error "Set ALIEN_JDL_LPMPRODUCTIONTYPE or PRODUCTIONTYPE" ; exit 1 ; }
[ -z "${ALIEN_JDL_LPMINTERACTIONTYPE}" ] && { echo_error "Set ALIEN_JDL_LPMINTERACTIONTYPE or INTERACTIONTYPE" ; exit 1 ; }
[ -z "${ALIEN_JDL_LPMPRODUCTIONTAG}" ] && { echo_error "Set ALIEN_JDL_LPMPRODUCTIONTAG or PRODUCTIONTAG" ; exit 1 ; }
[ -z "${ALIEN_JDL_LPMANCHORRUN}" ] && { echo_error "Set ALIEN_JDL_LPMANCHORRUN or ANCHORRUN" ; exit 1 ; }
[ -z "${ALIEN_JDL_LPMANCHORPRODUCTION}" ] && { echo_error "Set ALIEN_JDL_LPMANCHORPRODUCTION or ANCHORPRODUCTION" ; exit 1 ; }
[ -z "${ALIEN_JDL_LPMANCHORYEAR}" ] && { echo_error "Set ALIEN_JDL_LPMANCHORYEAR or ANCHORYEAR" ; exit 1 ; }

[ -z "${NTIMEFRAMES}" ] && { echo_error "Set NTIMEFRAMES" ; exit 1 ; }
[ -z "${NSIGEVENTS}" ] && { echo_error "Set NSIGEVENTS" ; exit 1 ; }
[ -z "${SPLITID}" ] && { echo_error "Set SPLITID" ; exit 1 ; }
[ -z "${CYCLE}" ] && { echo_error "Set CYCLE" ; exit 1 ; }
[ -z "${PRODSPLIT}" ] && { echo_error "Set PRODSPLIT" ; exit 1 ; }

# also for this keep a real default
NWORKERS=${NWORKERS:-8}
# set a default seed if not given
SEED=${ALIEN_PROC_ID:-${SEED:-1}}


# default async_pass.sh script
Expand All @@ -143,14 +160,14 @@ fi
# if there is no setenv_extra.sh in this directory (so no special version is "shipped" with this rpodcution), copy the default one
if [[ ! -f setenv_extra.sh ]] ; then
cp ${DPGSETENV} .
echo "[INFO alien_setenv_extra.sh] Use default setenv_extra.sh from ${DPGSETENV}."
echo_info "Use default setenv_extra.sh from ${DPGSETENV}."
else
echo "[INFO alien_setenv_extra.sh] setenv_extra.sh was found in the current working directory, use it."
echo_info "setenv_extra.sh was found in the current working directory, use it."
fi

chmod u+x setenv_extra.sh

echo "[INFO alien_async_pass.sh] Setting up DPGRECO to ${DPGRECO}"
echo_info "Setting up DPGRECO to ${DPGRECO}"

# settings that are MC-specific, modify setenv_extra.sh in-place
sed -i 's/GPU_global.dEdxUseFullGainMap=1;GPU_global.dEdxDisableResidualGainMap=1/GPU_global.dEdxSplineTopologyCorrFile=splines_for_dedx_V1_MC_iter0_PP.root;GPU_global.dEdxDisableTopologyPol=1;GPU_global.dEdxDisableGainMap=1;GPU_global.dEdxDisableResidualGainMap=1;GPU_global.dEdxDisableResidualGain=1/' setenv_extra.sh
Expand All @@ -167,30 +184,28 @@ touch list.list
./async_pass.sh ${CTF_TEST_FILE:-""} 2&> async_pass_log.log
RECO_RC=$?

echo "RECO finished with ${RECO_RC}"
echo_info "async_pass.sh finished with ${RECO_RC}"

if [[ "${RECO_RC}" != "0" ]] ; then
exit ${RECO_RC}
fi

ALIEN_JDL_LPMPRODUCTIONTAG=$ALIEN_JDL_LPMPRODUCTIONTAG_KEEP
echo "Setting back ALIEN_JDL_LPMPRODUCTIONTAG to $ALIEN_JDL_LPMPRODUCTIONTAG"
echo_info "Setting back ALIEN_JDL_LPMPRODUCTIONTAG to $ALIEN_JDL_LPMPRODUCTIONTAG"

# now create the local MC config file --> config-config.json
${O2DPG_ROOT}/UTILS/parse-async-WorkflowConfig.py
ASYNC_WF_RC=${?}

# check if config reasonably created
if [[ "${ASYNC_WF_RC}" != "0" || `grep "o2-ctf-reader-workflow-options" config-json.json 2> /dev/null | wc -l` == "0" ]]; then
echo "Problem in anchor config creation. Exiting."
echo_error "Problem in anchor config creation. Exiting."
exit 1
fi

# -- CREATE THE MC JOB DESCRIPTION ANCHORED TO RUN --

MODULES="--skipModules ZDC"
# introduce variable to make usage clear
SEED=${ALIEN_PROC_ID}
# Since this is used, set it explicitly
ALICEO2_CCDB_LOCALCACHE=${ALICEO2_CCDB_LOCALCACHE:-$(pwd)/ccdb}

Expand All @@ -205,28 +220,31 @@ remainingargs="${remainingargs} -productionTag ${ALIEN_JDL_LPMPRODUCTIONTAG:-ali
# since the last passed argument wins, e.g. -productionTag cannot be overwritten by the user
remainingargs="${ALIEN_JDL_ANCHOR_SIM_OPTIONS} ${remainingargs} --anchor-config config-json.json"

echo "baseargs: ${baseargs}"
echo "remainingargs: ${remainingargs}"
echo_info "baseargs passed to o2dpg_sim_workflow_anchored.py: ${baseargs}"
echo_info "remainingargs forwarded to o2dpg_sim_workflow.py: ${remainingargs}"

# query CCDB has changed, w/o "_"
${O2DPG_ROOT}/MC/bin/o2dpg_sim_workflow_anchored.py ${baseargs} -- ${remainingargs} &> timestampsampling_${ALIEN_JDL_LPMRUNNUMBER}.log
if [ "$?" != "0" ] ; then
echo "Problem during anchor timestamp sampling and workflow creation. Exiting."
exit 1
WF_RC="${?}"
if [ "${WF_RC}" != "0" ] ; then
echo_error "Problem during anchor timestamp sampling and workflow creation. Exiting."
exit ${WF_RC}
fi

TIMESTAMP=`grep "Determined timestamp to be" timestampsampling_${ALIEN_JDL_LPMRUNNUMBER}.log | awk '//{print $6}'`
echo "TIMESTAMP IS ${TIMESTAMP}"
echo_info "TIMESTAMP IS ${TIMESTAMP}"

# -- Create aligned geometry using ITS ideal alignment to avoid overlaps in geant
CCDBOBJECTS_IDEAL_MC="ITS/Calib/Align"
TIMESTAMP_IDEAL_MC=1
${O2_ROOT}/bin/o2-ccdb-downloadccdbfile --host http://alice-ccdb.cern.ch/ -p ${CCDBOBJECTS_IDEAL_MC} -d ${ALICEO2_CCDB_LOCALCACHE} --timestamp ${TIMESTAMP_IDEAL_MC}
if [ ! "$?" == "0" ]; then
echo "Problem during CCDB prefetching of ${CCDBOBJECTS_IDEAL_MC}. Exiting."
exit 1
CCDB_RC="${?}"
if [ ! "${CCDB_RC}" == "0" ]; then
echo_error "Problem during CCDB prefetching of ${CCDBOBJECTS_IDEAL_MC}. Exiting."
exit ${CCDB_RC}
fi

# TODO This can potentially be removed or if needed, should be taken over by o2dpg_sim_workflow_anchored.py and O2_dpg_workflow_runner.py
echo "run with echo in pipe" | ${O2_ROOT}/bin/o2-create-aligned-geometry-workflow --configKeyValues "HBFUtils.startTime=${TIMESTAMP}" --condition-remap=file://${ALICEO2_CCDB_LOCALCACHE}=ITS/Calib/Align -b
mkdir -p $ALICEO2_CCDB_LOCALCACHE/GLO/Config/GeometryAligned
ln -s -f $PWD/o2sim_geometry-aligned.root $ALICEO2_CCDB_LOCALCACHE/GLO/Config/GeometryAligned/snapshot.root
Expand All @@ -235,22 +253,22 @@ ln -s -f $PWD/o2sim_geometry-aligned.root $ALICEO2_CCDB_LOCALCACHE/GLO/Config/Ge

export FAIRMQ_IPC_PREFIX=./

echo "Ready to start main workflow"
echo_info "Ready to start main workflow"

${O2DPG_ROOT}/MC/bin/o2_dpg_workflow_runner.py -f workflow.json -tt ${ALIEN_JDL_O2DPGWORKFLOWTARGET:-aod} --cpu-limit ${ALIEN_JDL_CPULIMIT:-8}
MCRC=$? # <--- we'll report back this code

if [[ "${ALIEN_JDL_ADDTIMESERIESINMC}" != "0" ]]; then
# Default value is 1 so this is run by default.
echo "Running TPC time series"
echo_info "Running TPC time series"
${O2DPG_ROOT}/MC/bin/o2_dpg_workflow_runner.py -f workflow.json -tt tpctimes
fi

[[ ! -z "${DISABLE_QC}" ]] && echo "INFO: QC is disabled, skip it."
[[ ! -z "${DISABLE_QC}" ]] && echo_info "QC is disabled, skip it."

if [[ -z "${DISABLE_QC}" && "${MCRC}" = "0" && "${remainingargs}" == *"--include-local-qc"* ]] ; then
# do QC tasks
echo "Doing QC"
echo_info "Doing QC"
${O2DPG_ROOT}/MC/bin/o2_dpg_workflow_runner.py -f workflow.json --target-labels QC --cpu-limit ${ALIEN_JDL_CPULIMIT:-8} -k
# NOTE that with the -k|--keep-going option, the runner will try to keep on executing even if some tasks fail.
# That means, even if there is a failing QC task, the return code will be 0
Expand Down
9 changes: 5 additions & 4 deletions MC/run/ANCHOR/tests/test_anchor_2023_apass2_PbPb.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
# taken from https://its.cern.ch/jira/browse/O2-4586
export ALIEN_JDL_LPMANCHORPASSNAME=apass2
export ALIEN_JDL_MCANCHOR=apass2
export ALIEN_JDL_COLLISIONSYSTEM=Pb-Pb
export ALIEN_JDL_CPULIMIT=8
export ALIEN_JDL_LPMPASSNAME=apass2
export ALIEN_JDL_LPMRUNNUMBER=544121
export ALIEN_JDL_LPMPRODUCTIONTYPE=MC
export ALIEN_JDL_LPMINTERACTIONTYPE=PbPb
Expand All @@ -25,12 +23,15 @@ export SPLITID=100
export PRODSPLIT=153
export CYCLE=0

# on the GRID, this is set, for our use case, we can mimic any job ID
export ALIEN_PROC_ID=2963436952
# on the GRID, this is set and used as seed; when set, it takes precedence over SEED
#export ALIEN_PROC_ID=2963436952
export SEED=5

# run the central anchor steering script; this includes
# * derive timestamp
# * derive interaction rate
# * extract and prepare configurations (which detectors are contained in the run etc.)
# * run the simulation (and QC)
# To disable QC, uncomment the following line
#export DISABLE_QC=1
${O2DPG_ROOT}/MC/run/ANCHOR/anchorMC.sh
11 changes: 6 additions & 5 deletions MC/run/ANCHOR/tests/test_anchor_2023_apass2_pp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
# taken from https://its.cern.ch/jira/browse/O2-4586
export ALIEN_JDL_LPMANCHORPASSNAME=apass2
export ALIEN_JDL_MCANCHOR=apass2
export ALIEN_JDL_COLLISIONSYSTEM=p-p
export ALIEN_JDL_CPULIMIT=8
export ALIEN_JDL_LPMPASSNAME=apass2
export ALIEN_JDL_LPMRUNNUMBER=535069
export ALIEN_JDL_LPMPRODUCTIONTYPE=MC
export ALIEN_JDL_LPMINTERACTIONTYPE=pp
Expand All @@ -25,15 +23,18 @@ export SPLITID=100
export PRODSPLIT=153
export CYCLE=0

# on the GRID, this is set, for our use case, we can mimic any job ID
export ALIEN_PROC_ID=2963436952
# on the GRID, this is set and used as seed; when set, it takes precedence over SEED
#export ALIEN_PROC_ID=2963436952
export SEED=5

# for pp and 50 events per TF, we launch only 4 workers.
export NWORKERS=4
export NWORKERS=2

# run the central anchor steering script; this includes
# * derive timestamp
# * derive interaction rate
# * extract and prepare configurations (which detectors are contained in the run etc.)
# * run the simulation (and QC)
# To disable QC, uncomment the following line
#export DISABLE_QC=1
${O2DPG_ROOT}/MC/run/ANCHOR/anchorMC.sh