Skip to content

Commit

Permalink
HPCC-31893 Fix conflict between multiple thor components on same node
Browse files Browse the repository at this point in the history
Signed-off-by: Michael Gardner <[email protected]>
  • Loading branch information
Michael-Gardner committed May 21, 2024
1 parent c6c7ed5 commit 9a56920
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 17 deletions.
24 changes: 7 additions & 17 deletions initfiles/bin/init_thorslave.in
Original file line number Diff line number Diff line change
Expand Up @@ -44,24 +44,14 @@ slavename=thorslave_${hpcc_compname}
stop_slaves()
{
local timer=15
local isAlive=0

log "Attempting to kill $slavename with SIGTERM"
killall -SIGTERM $slavename > /dev/null 2>&1
while [[ $isAlive -eq 0 && $timer -gt 0 ]];do
killall -0 $slavename > /dev/null 2>&1
isAlive=$?
[[ $isAlive -eq 0 ]] && sleep 0.5
((timer--))
local _pidarray
local _pidname

_pidarray=($( ls -l ${PID_DIR} | awk -v regpattern=${slavename}_[1-9][0-9]*\.pid '$0~regpattern {print $9;}' ))
for _pidname in ${_pidarray[@]}; do
kill_process ${PID_DIR}/$_pidname $slavename $timer
done

if [[ $isAlive -eq 0 ]]; then
log "Failed to kill slaves with SIGTERM. Sending SIGKILL"
killall -SIGKILL $slavename > /dev/null
fi

# need regex here to prevent removing other Thor instance pid files
find ${PID_DIR} -maxdepth 1 -type f -regex ".*/${slavename}_[1-9][0-9]*\.pid" -delete > /dev/null 2>&1

}

start_slaves()
Expand Down
2 changes: 2 additions & 0 deletions initfiles/sbin/hpcc_setenv.in
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ function kill_process () {
SENTINEL=$4
[[ -e $SENTINEL ]] && rm -f $SENTINEL
fi
log "checking $PID"
if [[ -e $PID ]]; then
log "pidwait_fn $PID $PROCESS $TIMEOUT 1"
pidwait_fn $PID $PROCESS $TIMEOUT 1
local RC_PIDWAIT=$?
return $RC_PIDWAIT
Expand Down

0 comments on commit 9a56920

Please sign in to comment.