From ac6a6fde2cb601c1bd65decd65a2362bc44ec295 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thibaut=20Barr=C3=A8re?= Date: Tue, 24 Dec 2024 14:49:30 +0100 Subject: [PATCH] =?UTF-8?q?Hot-fix:=20shutdown=20du=20worker=20si=20les=20?= =?UTF-8?q?jobs=20ne=20sont=20plus=20trait=C3=A9s=20(#4405)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Shut down the worker app if jobs are not processed anymore * Update doc to better reflect current doc * mix format * Make sure to return a conn (avoid RuntimeError) * mix format --- .../transport_web/plugs/worker_healthcheck.ex | 32 +++++++++++++------ 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/apps/transport/lib/transport_web/plugs/worker_healthcheck.ex b/apps/transport/lib/transport_web/plugs/worker_healthcheck.ex index 269386d259..3066e88e5f 100644 --- a/apps/transport/lib/transport_web/plugs/worker_healthcheck.ex +++ b/apps/transport/lib/transport_web/plugs/worker_healthcheck.ex @@ -12,6 +12,7 @@ defmodule TransportWeb.Plugs.WorkerHealthcheck do if Oban attempted jobs recently. """ import Plug.Conn + require Logger @app_start_waiting_delay {20, :minute} @oban_max_delay_since_last_attempt {60, :minute} @@ -25,17 +26,28 @@ defmodule TransportWeb.Plugs.WorkerHealthcheck do store_last_attempted_at_delay_metric() status_code = if healthy_state?(), do: 200, else: 503 + conn = + conn + |> put_resp_content_type("text/plain") + |> send_resp(status_code, """ + UP (WORKER-ONLY) + App start time: #{app_start_datetime()} + App started recently?: #{app_started_recently?()} + Oban last attempt: #{oban_last_attempted_at()} + Oban attempted jobs recently?: #{oban_attempted_jobs_recently?()} + Healthy state?: #{healthy_state?()} + """) + |> halt() + + # NOTE: Clever Cloud monitoring will better pick stuff back up + # if the app is completely down. + if !healthy_state?() do + Logger.info("Hot-fix: shutting down!!!") + # "Asynchronously and carefully stops the Erlang runtime system." + System.stop() + end + conn - |> put_resp_content_type("text/plain") - |> send_resp(status_code, """ - UP (WORKER-ONLY) - App start time: #{app_start_datetime()} - App started recently?: #{app_started_recently?()} - Oban last attempt: #{oban_last_attempted_at()} - Oban attempted jobs recently?: #{oban_attempted_jobs_recently?()} - Healthy state?: #{healthy_state?()} - """) - |> halt() else conn end