From bacf7480338fee7c12784632ea54b0ca79faf5e1 Mon Sep 17 00:00:00 2001 From: Benjamin Pelletier Date: Tue, 2 May 2023 11:15:15 -0700 Subject: [PATCH 1/2] Add timeout retry logic for query_and_describe --- monitoring/monitorlib/fetch/__init__.py | 40 ++++++++++++++++++++----- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/monitoring/monitorlib/fetch/__init__.py b/monitoring/monitorlib/fetch/__init__.py index bd945b4b09..2b470226d8 100644 --- a/monitoring/monitorlib/fetch/__init__.py +++ b/monitoring/monitorlib/fetch/__init__.py @@ -1,10 +1,10 @@ import datetime import json import traceback -from types import MappingProxyType from typing import Dict, Optional, List import flask +from loguru import logger import requests import urllib3 import yaml @@ -15,6 +15,9 @@ TIMEOUTS = (5, 25) # Timeouts of `connect` and `read` in seconds +ATTEMPTS = ( + 2 # Number of attempts to query when experiencing a retryable error like a timeout +) class RequestDescription(ImplicitDict): @@ -197,12 +200,35 @@ def query_and_describe( req_kwargs = kwargs.copy() if "timeout" not in req_kwargs: req_kwargs["timeout"] = TIMEOUTS - t0 = datetime.datetime.utcnow() - try: - return describe_query(client.request(verb, url, **req_kwargs), t0) - except (requests.RequestException, urllib3.exceptions.ReadTimeoutError) as e: - msg = "{}: {}".format(type(e).__name__, str(e)) - t1 = datetime.datetime.utcnow() + + # Note: retry logic could be attached to the `client` Session my `mount`ing an HTTPAdapter with custom + # `max_retries`, however we do not want to mutate the provided Session. Instead, retry only on errors we explicitly + # consider retryable. + for attempt in range(ATTEMPTS): + t0 = datetime.datetime.utcnow() + try: + return describe_query(client.request(verb, url, **req_kwargs), t0) + except (requests.Timeout, urllib3.exceptions.ReadTimeoutError) as e: + logger.warning( + "query_and_describe attempt {} to {} {} failed with timeout {}: {}", + attempt + 1, + verb, + url, + type(e).__name__, + str(e), + ) + except requests.RequestException as e: + logger.warning( + "query_and_describe attempt {} to {} {} failed with non-retryable RequestException {}: {}", + attempt + 1, + verb, + url, + type(e).__name__, + str(e), + ) + break + finally: + t1 = datetime.datetime.utcnow() # Reconstruct request similar to the one in the query (which is not # accessible at this point) From d35707ac988b4d022fa2eb6796e75c780432eac8 Mon Sep 17 00:00:00 2001 From: Benjamin Pelletier Date: Wed, 3 May 2023 07:45:20 -0700 Subject: [PATCH 2/2] Address comments Co-authored-by: Michael Barroco --- monitoring/monitorlib/fetch/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monitoring/monitorlib/fetch/__init__.py b/monitoring/monitorlib/fetch/__init__.py index 2b470226d8..8f57d67326 100644 --- a/monitoring/monitorlib/fetch/__init__.py +++ b/monitoring/monitorlib/fetch/__init__.py @@ -201,7 +201,7 @@ def query_and_describe( if "timeout" not in req_kwargs: req_kwargs["timeout"] = TIMEOUTS - # Note: retry logic could be attached to the `client` Session my `mount`ing an HTTPAdapter with custom + # Note: retry logic could be attached to the `client` Session by `mount`ing an HTTPAdapter with custom # `max_retries`, however we do not want to mutate the provided Session. Instead, retry only on errors we explicitly # consider retryable. for attempt in range(ATTEMPTS):