From e2d0ae4db3e326d62d1112d3bc85920f3315c975 Mon Sep 17 00:00:00 2001 From: Johan Bloemberg Date: Fri, 18 Oct 2024 15:14:11 +0200 Subject: [PATCH] Improve restarting of workers to prevent issues with batch request --- docker/docker-compose.yml | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 2475f0b93..6cfc26f67 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -249,6 +249,8 @@ services: # time after which a SIGKILL is sent to celery after a SIGTERM (warm shutdown), default 10s # insufficient short grace period causes issues on batch when tasks are killed during the hourly worker restart stop_grace_period: 10m + # SIGTERM is default, but make it explicit + stop_signal: SIGTERM depends_on: db-migrate: @@ -1127,15 +1129,23 @@ configs: content: | #!/bin/sh set -e - # find worker and restart the container(s) - docker restart $(docker ps --filter label=com.docker.compose.service=worker --quiet) + # stop and start worker one at a time to ensure (batch) tasks are still being picked up + # workers are sent a TERM signal with which a 10 minute grace period before QUIT is sent + for worker in $(docker ps --filter label=com.docker.compose.service=worker --quiet); do + docker stop "$$worker" + docker start "$$worker" + done restart_nassl_worker_cron: content: | #!/bin/sh set -e - # find nassl worker and restart the container(s) - docker restart $(docker ps --filter label=com.docker.compose.service=worker-nassl --quiet) + # stop and start worker one at a time to ensure (batch) tasks are still being picked up + # workers are sent a TERM signal with which a 10 minute grace period before QUIT is sent + for worker in $(docker ps --filter label=com.docker.compose.service=worker-nassl --quiet); do + docker stop "$$worker" + docker start "$$worker" + done docker_image_prune: content: |