Skip to content
This repository has been archived by the owner on Nov 9, 2017. It is now read-only.

init.d: better handle start and stop #89

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 57 additions & 11 deletions debian/td-agent.init
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ DESC=td-agent # Introduce a short description here
PIDFILE=/var/run/$NAME/$NAME.pid
DAEMON=/usr/lib/fluent/ruby/bin/ruby # Introduce the server's location here
# Arguments to run the daemon with
DAEMON_ARGS="/usr/sbin/td-agent $DAEMON_ARGS --daemon $PIDFILE --log /var/log/td-agent/td-agent.log"
TD_AGENT_CMD="/usr/sbin/td-agent"
DAEMON_ARGS="$TD_AGENT_CMD $DAEMON_ARGS --daemon $PIDFILE --log /var/log/td-agent/td-agent.log"
SCRIPTNAME=/etc/init.d/$NAME
START_STOP_DAEMON_ARGS=""

Expand Down Expand Up @@ -66,6 +67,33 @@ if [ -f "/usr/lib/fluent/jemalloc/lib/libjemalloc.so" ]; then
export LD_PRELOAD=/usr/lib/fluent/jemalloc/lib/libjemalloc.so
fi

get_pids()
{
pgrep -f "^$DAEMON $TD_AGENT_CMD"
}

# Usage: wait_pids TIMEOUT
# Every second wait_pids checks if get_pids returns any PIDs and:
# - Returns 0 if no get_pids does not return any PIDs anymore
# - Returns 2 if TIMEOUT is reached
wait_pids()
{
retval=0
counter=0
timeout=$1
while get_pids > /dev/null; do
if [ $counter -gt $timeout ]; then
retval=2
break
fi
echo -n "."
sleep 1
counter=$(($counter+1))
done
echo
return $retval
}

#
# Function that starts the daemon/service
#
Expand All @@ -82,6 +110,8 @@ do_start()
start-stop-daemon --start --quiet --pidfile $PIDFILE --exec $DAEMON \
${START_STOP_DAEMON_ARGS} --test > /dev/null \
|| return 1
get_pids > /dev/null \
&& return 2
start-stop-daemon --start --quiet --pidfile $PIDFILE --exec $DAEMON \
${START_STOP_DAEMON_ARGS} -- $DAEMON_ARGS \
|| return 2
Expand All @@ -100,18 +130,34 @@ do_stop()
# 1 if daemon was already stopped
# 2 if daemon could not be stopped
# other if a failure occurred
start-stop-daemon --stop --quiet --retry=TERM/30/KILL/5 --pidfile $PIDFILE --name ruby

wait_to_term=300
wait_to_kill=5

start-stop-daemon --stop --quiet --retry=TERM/$wait_to_term/KILL/$wait_to_kill --pidfile $PIDFILE --name ruby
RETVAL="$?"
[ "$RETVAL" = 2 ] && return 2
# Wait for children to finish too if this is a daemon that forks
# and if the daemon is only ever run from this initscript.
# If the above conditions are not satisfied then add some other code
# that waits for the process to drop all resources that could be
# needed by services started subsequently. A last resort is to
# sleep for some time.
start-stop-daemon --stop --quiet --oknodo --retry=0/30/KILL/5 --exec $DAEMON
[ "$?" = 2 ] && return 2
# Many daemons don't delete their pidfiles when they exit.

# Handle the case when either pid-file is missing or parent process
# is dead but children are still running (with different PID)
PIDs=$(get_pids)
if [ -n "$PIDs" ]; then
# Try to send TERM and wait
echo "Send TERM"
kill $PIDs
wait_pids $wait_to_term
RETVAL="$?"

# Try to send KILL and wait
if [ "$RETVAL" = 2 ]; then
echo "Send KILL"
kill -9 $PIDs
wait_pids $wait_to_kill
RETVAL="$?"
fi
fi

[ "$RETVAL" = 2 ] && return 2
rm -f $PIDFILE
return "$RETVAL"
}
Expand Down