From 9a569207ccf0aca58421ed982f90f9a114b7b897 Mon Sep 17 00:00:00 2001 From: Michael Gardner Date: Tue, 21 May 2024 10:59:54 -0400 Subject: [PATCH] HPCC-31893 Fix conflict between multiple thor components on same node Signed-off-by: Michael Gardner --- initfiles/bin/init_thorslave.in | 24 +++++++----------------- initfiles/sbin/hpcc_setenv.in | 2 ++ 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/initfiles/bin/init_thorslave.in b/initfiles/bin/init_thorslave.in index 1affe3ba9c3..a7cc38d2553 100755 --- a/initfiles/bin/init_thorslave.in +++ b/initfiles/bin/init_thorslave.in @@ -44,24 +44,14 @@ slavename=thorslave_${hpcc_compname} stop_slaves() { local timer=15 - local isAlive=0 - - log "Attempting to kill $slavename with SIGTERM" - killall -SIGTERM $slavename > /dev/null 2>&1 - while [[ $isAlive -eq 0 && $timer -gt 0 ]];do - killall -0 $slavename > /dev/null 2>&1 - isAlive=$? - [[ $isAlive -eq 0 ]] && sleep 0.5 - ((timer--)) + local _pidarray + local _pidname + + _pidarray=($( ls -l ${PID_DIR} | awk -v regpattern=${slavename}_[1-9][0-9]*\.pid '$0~regpattern {print $9;}' )) + for _pidname in ${_pidarray[@]}; do + kill_process ${PID_DIR}/$_pidname $slavename $timer done - - if [[ $isAlive -eq 0 ]]; then - log "Failed to kill slaves with SIGTERM. Sending SIGKILL" - killall -SIGKILL $slavename > /dev/null - fi - - # need regex here to prevent removing other Thor instance pid files - find ${PID_DIR} -maxdepth 1 -type f -regex ".*/${slavename}_[1-9][0-9]*\.pid" -delete > /dev/null 2>&1 + } start_slaves() diff --git a/initfiles/sbin/hpcc_setenv.in b/initfiles/sbin/hpcc_setenv.in index 60de6860025..af35d0471d5 100755 --- a/initfiles/sbin/hpcc_setenv.in +++ b/initfiles/sbin/hpcc_setenv.in @@ -30,7 +30,9 @@ function kill_process () { SENTINEL=$4 [[ -e $SENTINEL ]] && rm -f $SENTINEL fi + log "checking $PID" if [[ -e $PID ]]; then + log "pidwait_fn $PID $PROCESS $TIMEOUT 1" pidwait_fn $PID $PROCESS $TIMEOUT 1 local RC_PIDWAIT=$? return $RC_PIDWAIT