From df5397f4b58a612d935f45c92b566f2dd2ac3dea Mon Sep 17 00:00:00 2001 From: Benedikt Volkel Date: Fri, 22 Mar 2024 14:05:58 +0100 Subject: [PATCH] Tidy a bit and abort as early as possible * check for GRID token at the very beginning * check for jq tool at the very beginning * set a default seed if nothing is given * adjust tests --- MC/run/ANCHOR/anchorMC.sh | 104 ++++++++++-------- .../tests/test_anchor_2023_apass2_PbPb.sh | 9 +- .../tests/test_anchor_2023_apass2_pp.sh | 11 +- 3 files changed, 72 insertions(+), 52 deletions(-) diff --git a/MC/run/ANCHOR/anchorMC.sh b/MC/run/ANCHOR/anchorMC.sh index 13512fea7..827706219 100755 --- a/MC/run/ANCHOR/anchorMC.sh +++ b/MC/run/ANCHOR/anchorMC.sh @@ -15,6 +15,16 @@ # helper functionality # ######################## +echo_info() +{ + echo "INFO [anchorMC]: ${*}" +} + +echo_error() +{ + echo "ERROR [anchorMC]: ${*}" +} + print_help() { echo "Usage: ./anchorMC.sh" @@ -53,7 +63,7 @@ print_help() # Prevent the script from being soured to omit unexpected surprises when exit is used SCRIPT_NAME="$(basename "$(test -L "$0" && readlink "$0" || echo "$0")")" if [ "${SCRIPT_NAME}" != "$(basename ${BASH_SOURCE[0]})" ] ; then - echo "ERROR: This script cannot not be sourced" >&2 + echo_error "This script cannot not be sourced" >&2 return 1 fi @@ -70,8 +80,15 @@ while [ "$1" != "" ] ; do done # make sure O2DPG + O2 is loaded -[ ! "${O2DPG_ROOT}" ] && echo "Error: This needs O2DPG loaded" && exit 1 -[ ! "${O2_ROOT}" ] && echo "Error: This needs O2 loaded" && exit 1 +[ ! "${O2DPG_ROOT}" ] && echo_error "This needs O2DPG loaded" && exit 1 +[ ! "${O2_ROOT}" ] && echo_error "This needs O2 loaded" && exit 1 + +# check if jq is there +which jq >/dev/null 2>&1 +[ "${?}" != "0" ] && { echo_error "jq is not found. Install or load via alienv." ; exit 1 ; } + +alien-token-info >/dev/null 2>&1 +[ "${?}" != "0" ] && { echo_error "No GRID token found, required to run." ; exit 1 ; } ################################################################# # Set all required variables to identify an anchored production # @@ -87,8 +104,8 @@ export ALIEN_JDL_WORKFLOWDETECTORS=${ALIEN_JDL_WORKFLOWDETECTORS:-ITS,TPC,TOF,FV export ALIEN_JDL_ANCHOR_SIM_OPTIONS=${ALIEN_JDL_ANCHOR_SIM_OPTIONS:--gen pythia8} # all others MUST be set by the user/on the outside export ALIEN_JDL_LPMANCHORPASSNAME=${ALIEN_JDL_LPMANCHORPASSNAME:-${ANCHORPASSNAME}} -export ALIEN_JDL_MCANCHOR=${ALIEN_JDL_MCANCHOR:-${MCANCHOR}} -export ALIEN_JDL_LPMPASSNAME=${ALIEN_JDL_LPMPASSNAME:-${PASSNAME}} +# LPMPASSNAME is used in O2 and O2DPG scripts, however on the other hand, ALIEN_JDL_LPMANCHORPASSNAME is the one that is set in JDL templates; so use ALIEN_JDL_LPMANCHORPASSNAME and set ALIEN_JDL_LPMPASSNAME +export ALIEN_JDL_LPMPASSNAME=${ALIEN_JDL_LPMANCHORPASSNAME} export ALIEN_JDL_LPMRUNNUMBER=${ALIEN_JDL_LPMRUNNUMBER:-${RUNNUMBER}} export ALIEN_JDL_LPMPRODUCTIONTYPE=${ALIEN_JDL_LPMPRODUCTIONTYPE:-${PRODUCTIONTYPE}} export ALIEN_JDL_LPMINTERACTIONTYPE=${ALIEN_JDL_LPMINTERACTIONTYPE:-${INTERACTIONTYPE}} @@ -101,29 +118,29 @@ export ALIEN_JDL_ADDTIMESERIESINMC=${ALIEN_JDL_ADDTIMESERIESINMC:-1} # cache the production tag, will be set to a special anchor tag; reset later in fact ALIEN_JDL_LPMPRODUCTIONTAG_KEEP=$ALIEN_JDL_LPMPRODUCTIONTAG -echo "Substituting ALIEN_JDL_LPMPRODUCTIONTAG=$ALIEN_JDL_LPMPRODUCTIONTAG with ALIEN_JDL_LPMANCHORPRODUCTION=$ALIEN_JDL_LPMANCHORPRODUCTION for simulating reco pass..." +echo_info "Substituting ALIEN_JDL_LPMPRODUCTIONTAG=$ALIEN_JDL_LPMPRODUCTIONTAG with ALIEN_JDL_LPMANCHORPRODUCTION=$ALIEN_JDL_LPMANCHORPRODUCTION for simulating reco pass..." ALIEN_JDL_LPMPRODUCTIONTAG=$ALIEN_JDL_LPMANCHORPRODUCTION # check variables that need to be set -[ -z "${ALIEN_JDL_LPMANCHORPASSNAME}" ] && { echo "ERROR: Set ALIEN_JDL_LPMANCHORPASSNAME or ANCHORPASSNAME" ; exit 1 ; } -[ -z "${ALIEN_JDL_MCANCHOR}" ] && { echo "ERROR: Set ALIEN_JDL_MCANCHOR or MCANCHOR" ; exit 1 ; } -[ -z "${ALIEN_JDL_LPMPASSNAME}" ] && { echo "ERROR: Set ALIEN_JDL_LPMPASSNAME or PASSNAME" ; exit 1 ; } -[ -z "${ALIEN_JDL_LPMRUNNUMBER}" ] && { echo "ERROR: Set ALIEN_JDL_LPMRUNNUMBER or RUNNUMBER" ; exit 1 ; } -[ -z "${ALIEN_JDL_LPMPRODUCTIONTYPE}" ] && { echo "ERROR: Set ALIEN_JDL_LPMPRODUCTIONTYPE or PRODUCTIONTYPE" ; exit 1 ; } -[ -z "${ALIEN_JDL_LPMINTERACTIONTYPE}" ] && { echo "ERROR: Set ALIEN_JDL_LPMINTERACTIONTYPE or INTERACTIONTYPE" ; exit 1 ; } -[ -z "${ALIEN_JDL_LPMPRODUCTIONTAG}" ] && { echo "ERROR: Set ALIEN_JDL_LPMPRODUCTIONTAG or PRODUCTIONTAG" ; exit 1 ; } -[ -z "${ALIEN_JDL_LPMANCHORRUN}" ] && { echo "ERROR: Set ALIEN_JDL_LPMANCHORRUN or ANCHORRUN" ; exit 1 ; } -[ -z "${ALIEN_JDL_LPMANCHORPRODUCTION}" ] && { echo "ERROR: Set ALIEN_JDL_LPMANCHORPRODUCTION or ANCHORPRODUCTION" ; exit 1 ; } -[ -z "${ALIEN_JDL_LPMANCHORYEAR}" ] && { echo "ERROR: Set ALIEN_JDL_LPMANCHORYEAR or ANCHORYEAR" ; exit 1 ; } - -[ -z "${NTIMEFRAMES}" ] && { echo "ERROR: Set NTIMEFRAMES" ; exit 1 ; } -[ -z "${NSIGEVENTS}" ] && { echo "ERROR: Set NSIGEVENTS" ; exit 1 ; } -[ -z "${SPLITID}" ] && { echo "ERROR: Set SPLITID" ; exit 1 ; } -[ -z "${CYCLE}" ] && { echo "ERROR: Set CYCLE" ; exit 1 ; } -[ -z "${PRODSPLIT}" ] && { echo "ERROR: Set PRODSPLIT" ; exit 1 ; } +[ -z "${ALIEN_JDL_LPMANCHORPASSNAME}" ] && { echo_error "Set ALIEN_JDL_LPMANCHORPASSNAME or ANCHORPASSNAME" ; exit 1 ; } +[ -z "${ALIEN_JDL_LPMRUNNUMBER}" ] && { echo_error "Set ALIEN_JDL_LPMRUNNUMBER or RUNNUMBER" ; exit 1 ; } +[ -z "${ALIEN_JDL_LPMPRODUCTIONTYPE}" ] && { echo_error "Set ALIEN_JDL_LPMPRODUCTIONTYPE or PRODUCTIONTYPE" ; exit 1 ; } +[ -z "${ALIEN_JDL_LPMINTERACTIONTYPE}" ] && { echo_error "Set ALIEN_JDL_LPMINTERACTIONTYPE or INTERACTIONTYPE" ; exit 1 ; } +[ -z "${ALIEN_JDL_LPMPRODUCTIONTAG}" ] && { echo_error "Set ALIEN_JDL_LPMPRODUCTIONTAG or PRODUCTIONTAG" ; exit 1 ; } +[ -z "${ALIEN_JDL_LPMANCHORRUN}" ] && { echo_error "Set ALIEN_JDL_LPMANCHORRUN or ANCHORRUN" ; exit 1 ; } +[ -z "${ALIEN_JDL_LPMANCHORPRODUCTION}" ] && { echo_error "Set ALIEN_JDL_LPMANCHORPRODUCTION or ANCHORPRODUCTION" ; exit 1 ; } +[ -z "${ALIEN_JDL_LPMANCHORYEAR}" ] && { echo_error "Set ALIEN_JDL_LPMANCHORYEAR or ANCHORYEAR" ; exit 1 ; } + +[ -z "${NTIMEFRAMES}" ] && { echo_error "Set NTIMEFRAMES" ; exit 1 ; } +[ -z "${NSIGEVENTS}" ] && { echo_error "Set NSIGEVENTS" ; exit 1 ; } +[ -z "${SPLITID}" ] && { echo_error "Set SPLITID" ; exit 1 ; } +[ -z "${CYCLE}" ] && { echo_error "Set CYCLE" ; exit 1 ; } +[ -z "${PRODSPLIT}" ] && { echo_error "Set PRODSPLIT" ; exit 1 ; } # also for this keep a real default NWORKERS=${NWORKERS:-8} +# set a default seed if not given +SEED=${ALIEN_PROC_ID:-${SEED:-1}} # default async_pass.sh script @@ -143,14 +160,14 @@ fi # if there is no setenv_extra.sh in this directory (so no special version is "shipped" with this rpodcution), copy the default one if [[ ! -f setenv_extra.sh ]] ; then cp ${DPGSETENV} . - echo "[INFO alien_setenv_extra.sh] Use default setenv_extra.sh from ${DPGSETENV}." + echo_info "Use default setenv_extra.sh from ${DPGSETENV}." else - echo "[INFO alien_setenv_extra.sh] setenv_extra.sh was found in the current working directory, use it." + echo_info "setenv_extra.sh was found in the current working directory, use it." fi chmod u+x setenv_extra.sh -echo "[INFO alien_async_pass.sh] Setting up DPGRECO to ${DPGRECO}" +echo_info "Setting up DPGRECO to ${DPGRECO}" # settings that are MC-specific, modify setenv_extra.sh in-place sed -i 's/GPU_global.dEdxUseFullGainMap=1;GPU_global.dEdxDisableResidualGainMap=1/GPU_global.dEdxSplineTopologyCorrFile=splines_for_dedx_V1_MC_iter0_PP.root;GPU_global.dEdxDisableTopologyPol=1;GPU_global.dEdxDisableGainMap=1;GPU_global.dEdxDisableResidualGainMap=1;GPU_global.dEdxDisableResidualGain=1/' setenv_extra.sh @@ -167,14 +184,14 @@ touch list.list ./async_pass.sh ${CTF_TEST_FILE:-""} 2&> async_pass_log.log RECO_RC=$? -echo "RECO finished with ${RECO_RC}" +echo_info "async_pass.sh finished with ${RECO_RC}" if [[ "${RECO_RC}" != "0" ]] ; then exit ${RECO_RC} fi ALIEN_JDL_LPMPRODUCTIONTAG=$ALIEN_JDL_LPMPRODUCTIONTAG_KEEP -echo "Setting back ALIEN_JDL_LPMPRODUCTIONTAG to $ALIEN_JDL_LPMPRODUCTIONTAG" +echo_info "Setting back ALIEN_JDL_LPMPRODUCTIONTAG to $ALIEN_JDL_LPMPRODUCTIONTAG" # now create the local MC config file --> config-config.json ${O2DPG_ROOT}/UTILS/parse-async-WorkflowConfig.py @@ -182,15 +199,13 @@ ASYNC_WF_RC=${?} # check if config reasonably created if [[ "${ASYNC_WF_RC}" != "0" || `grep "o2-ctf-reader-workflow-options" config-json.json 2> /dev/null | wc -l` == "0" ]]; then - echo "Problem in anchor config creation. Exiting." + echo_error "Problem in anchor config creation. Exiting." exit 1 fi # -- CREATE THE MC JOB DESCRIPTION ANCHORED TO RUN -- MODULES="--skipModules ZDC" -# introduce variable to make usage clear -SEED=${ALIEN_PROC_ID} # Since this is used, set it explicitly ALICEO2_CCDB_LOCALCACHE=${ALICEO2_CCDB_LOCALCACHE:-$(pwd)/ccdb} @@ -205,28 +220,31 @@ remainingargs="${remainingargs} -productionTag ${ALIEN_JDL_LPMPRODUCTIONTAG:-ali # since the last passed argument wins, e.g. -productionTag cannot be overwritten by the user remainingargs="${ALIEN_JDL_ANCHOR_SIM_OPTIONS} ${remainingargs} --anchor-config config-json.json" -echo "baseargs: ${baseargs}" -echo "remainingargs: ${remainingargs}" +echo_info "baseargs passed to o2dpg_sim_workflow_anchored.py: ${baseargs}" +echo_info "remainingargs forwarded to o2dpg_sim_workflow.py: ${remainingargs}" # query CCDB has changed, w/o "_" ${O2DPG_ROOT}/MC/bin/o2dpg_sim_workflow_anchored.py ${baseargs} -- ${remainingargs} &> timestampsampling_${ALIEN_JDL_LPMRUNNUMBER}.log -if [ "$?" != "0" ] ; then - echo "Problem during anchor timestamp sampling and workflow creation. Exiting." - exit 1 +WF_RC="${?}" +if [ "${WF_RC}" != "0" ] ; then + echo_error "Problem during anchor timestamp sampling and workflow creation. Exiting." + exit ${WF_RC} fi TIMESTAMP=`grep "Determined timestamp to be" timestampsampling_${ALIEN_JDL_LPMRUNNUMBER}.log | awk '//{print $6}'` -echo "TIMESTAMP IS ${TIMESTAMP}" +echo_info "TIMESTAMP IS ${TIMESTAMP}" # -- Create aligned geometry using ITS ideal alignment to avoid overlaps in geant CCDBOBJECTS_IDEAL_MC="ITS/Calib/Align" TIMESTAMP_IDEAL_MC=1 ${O2_ROOT}/bin/o2-ccdb-downloadccdbfile --host http://alice-ccdb.cern.ch/ -p ${CCDBOBJECTS_IDEAL_MC} -d ${ALICEO2_CCDB_LOCALCACHE} --timestamp ${TIMESTAMP_IDEAL_MC} -if [ ! "$?" == "0" ]; then - echo "Problem during CCDB prefetching of ${CCDBOBJECTS_IDEAL_MC}. Exiting." - exit 1 +CCDB_RC="${?}" +if [ ! "${CCDB_RC}" == "0" ]; then + echo_error "Problem during CCDB prefetching of ${CCDBOBJECTS_IDEAL_MC}. Exiting." + exit ${CCDB_RC} fi +# TODO This can potentially be removed or if needed, should be taken over by o2dpg_sim_workflow_anchored.py and O2_dpg_workflow_runner.py echo "run with echo in pipe" | ${O2_ROOT}/bin/o2-create-aligned-geometry-workflow --configKeyValues "HBFUtils.startTime=${TIMESTAMP}" --condition-remap=file://${ALICEO2_CCDB_LOCALCACHE}=ITS/Calib/Align -b mkdir -p $ALICEO2_CCDB_LOCALCACHE/GLO/Config/GeometryAligned ln -s -f $PWD/o2sim_geometry-aligned.root $ALICEO2_CCDB_LOCALCACHE/GLO/Config/GeometryAligned/snapshot.root @@ -235,22 +253,22 @@ ln -s -f $PWD/o2sim_geometry-aligned.root $ALICEO2_CCDB_LOCALCACHE/GLO/Config/Ge export FAIRMQ_IPC_PREFIX=./ -echo "Ready to start main workflow" +echo_info "Ready to start main workflow" ${O2DPG_ROOT}/MC/bin/o2_dpg_workflow_runner.py -f workflow.json -tt ${ALIEN_JDL_O2DPGWORKFLOWTARGET:-aod} --cpu-limit ${ALIEN_JDL_CPULIMIT:-8} MCRC=$? # <--- we'll report back this code if [[ "${ALIEN_JDL_ADDTIMESERIESINMC}" != "0" ]]; then # Default value is 1 so this is run by default. - echo "Running TPC time series" + echo_info "Running TPC time series" ${O2DPG_ROOT}/MC/bin/o2_dpg_workflow_runner.py -f workflow.json -tt tpctimes fi -[[ ! -z "${DISABLE_QC}" ]] && echo "INFO: QC is disabled, skip it." +[[ ! -z "${DISABLE_QC}" ]] && echo_info "QC is disabled, skip it." if [[ -z "${DISABLE_QC}" && "${MCRC}" = "0" && "${remainingargs}" == *"--include-local-qc"* ]] ; then # do QC tasks - echo "Doing QC" + echo_info "Doing QC" ${O2DPG_ROOT}/MC/bin/o2_dpg_workflow_runner.py -f workflow.json --target-labels QC --cpu-limit ${ALIEN_JDL_CPULIMIT:-8} -k # NOTE that with the -k|--keep-going option, the runner will try to keep on executing even if some tasks fail. # That means, even if there is a failing QC task, the return code will be 0 diff --git a/MC/run/ANCHOR/tests/test_anchor_2023_apass2_PbPb.sh b/MC/run/ANCHOR/tests/test_anchor_2023_apass2_PbPb.sh index 095908e4b..0b3cc549e 100755 --- a/MC/run/ANCHOR/tests/test_anchor_2023_apass2_PbPb.sh +++ b/MC/run/ANCHOR/tests/test_anchor_2023_apass2_PbPb.sh @@ -8,9 +8,7 @@ # taken from https://its.cern.ch/jira/browse/O2-4586 export ALIEN_JDL_LPMANCHORPASSNAME=apass2 export ALIEN_JDL_MCANCHOR=apass2 -export ALIEN_JDL_COLLISIONSYSTEM=Pb-Pb export ALIEN_JDL_CPULIMIT=8 -export ALIEN_JDL_LPMPASSNAME=apass2 export ALIEN_JDL_LPMRUNNUMBER=544121 export ALIEN_JDL_LPMPRODUCTIONTYPE=MC export ALIEN_JDL_LPMINTERACTIONTYPE=PbPb @@ -25,12 +23,15 @@ export SPLITID=100 export PRODSPLIT=153 export CYCLE=0 -# on the GRID, this is set, for our use case, we can mimic any job ID -export ALIEN_PROC_ID=2963436952 +# on the GRID, this is set and used as seed; when set, it takes precedence over SEED +#export ALIEN_PROC_ID=2963436952 +export SEED=5 # run the central anchor steering script; this includes # * derive timestamp # * derive interaction rate # * extract and prepare configurations (which detectors are contained in the run etc.) # * run the simulation (and QC) +# To disable QC, uncomment the following line +#export DISABLE_QC=1 ${O2DPG_ROOT}/MC/run/ANCHOR/anchorMC.sh diff --git a/MC/run/ANCHOR/tests/test_anchor_2023_apass2_pp.sh b/MC/run/ANCHOR/tests/test_anchor_2023_apass2_pp.sh index e528b8a0a..aea70f3fe 100755 --- a/MC/run/ANCHOR/tests/test_anchor_2023_apass2_pp.sh +++ b/MC/run/ANCHOR/tests/test_anchor_2023_apass2_pp.sh @@ -8,9 +8,7 @@ # taken from https://its.cern.ch/jira/browse/O2-4586 export ALIEN_JDL_LPMANCHORPASSNAME=apass2 export ALIEN_JDL_MCANCHOR=apass2 -export ALIEN_JDL_COLLISIONSYSTEM=p-p export ALIEN_JDL_CPULIMIT=8 -export ALIEN_JDL_LPMPASSNAME=apass2 export ALIEN_JDL_LPMRUNNUMBER=535069 export ALIEN_JDL_LPMPRODUCTIONTYPE=MC export ALIEN_JDL_LPMINTERACTIONTYPE=pp @@ -25,15 +23,18 @@ export SPLITID=100 export PRODSPLIT=153 export CYCLE=0 -# on the GRID, this is set, for our use case, we can mimic any job ID -export ALIEN_PROC_ID=2963436952 +# on the GRID, this is set and used as seed; when set, it takes precedence over SEED +#export ALIEN_PROC_ID=2963436952 +export SEED=5 # for pp and 50 events per TF, we launch only 4 workers. -export NWORKERS=4 +export NWORKERS=2 # run the central anchor steering script; this includes # * derive timestamp # * derive interaction rate # * extract and prepare configurations (which detectors are contained in the run etc.) # * run the simulation (and QC) +# To disable QC, uncomment the following line +#export DISABLE_QC=1 ${O2DPG_ROOT}/MC/run/ANCHOR/anchorMC.sh