Skip to content

Commit

Permalink
test cases for retrying
Browse files Browse the repository at this point in the history
  • Loading branch information
kneeyo1 committed Nov 27, 2024
1 parent 6849bff commit 982dcc8
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 19 deletions.
50 changes: 32 additions & 18 deletions src/sentry/processing/backpressure/monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,30 +80,44 @@ def assert_all_services_defined(services: dict[str, Service]) -> None:

def check_service_health(services: Mapping[str, Service]) -> MutableMapping[str, UnhealthyReasons]:
unhealthy_services: MutableMapping[str, UnhealthyReasons] = {}

max_retries = 2
for name, service in services.items():
high_watermark = options.get(f"backpressure.high_watermarks.{name}")
reasons = []

logger.info("Checking service `%s` (configured high watermark: %s):", name, high_watermark)
try:
for memory in check_service_memory(service):
if memory.percentage >= high_watermark:
reasons.append(memory)
logger.info(
" name: %s, used: %s, available: %s, percentage: %s",
memory.name,
memory.used,
memory.available,
memory.percentage,
for attempt in range(max_retries):
try:
for memory in check_service_memory(service):
if memory.percentage >= high_watermark:
reasons.append(memory)
logger.info(
" name: %s, used: %s, available: %s, percentage: %s",
memory.name,
memory.used,
memory.available,
memory.percentage,
)
break
except (ConnectionError, TimeoutError) as e:
logger.warning(
"Attempt %d/%d: Service `%s` encountered a connection error: %s",
attempt + 1,
max_retries,
name,
e,
)
except (ConnectionError, TimeoutError):
load_service_definitions(refresh=True)
except Exception as e:
with sentry_sdk.isolation_scope() as scope:
scope.set_tag("service", name)
sentry_sdk.capture_exception(e)
unhealthy_services[name] = e
if attempt < max_retries - 1:
load_service_definitions(refresh=True)
else:
sentry_sdk.capture_exception(e)
unhealthy_services[name] = e
except Exception as e:
with sentry_sdk.isolation_scope() as scope:
scope.set_tag("service", name)
sentry_sdk.capture_exception(e)
unhealthy_services[name] = e
break
else:
unhealthy_services[name] = reasons

Expand Down
38 changes: 37 additions & 1 deletion tests/sentry/processing/backpressure/test_redis.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from unittest.mock import patch

from django.test.utils import override_settings

from sentry.processing.backpressure.memory import iter_cluster_memory_usage
from sentry.processing.backpressure.memory import ServiceMemory, iter_cluster_memory_usage
from sentry.processing.backpressure.monitor import (
Redis,
check_service_health,
Expand Down Expand Up @@ -57,3 +59,37 @@ def test_redis_unhealthy_state():
redis_services = unhealthy_services.get("redis")
assert isinstance(redis_services, list)
assert len(redis_services) == 6


@use_redis_cluster()
@patch("sentry.processing.backpressure.monitor.iter_cluster_memory_usage")
@patch("sentry.processing.backpressure.monitor.load_service_definitions")
def test_redis_retry_fail(mock_load_service_definitions, mock_iter_cluster_memory_usage):
mock_iter_cluster_memory_usage.side_effect = ConnectionError("Connection failed")
services = load_service_definitions()

unhealthy_services = check_service_health(services=services)
redis_services = unhealthy_services.get("redis")

assert isinstance(redis_services, ConnectionError)
assert mock_iter_cluster_memory_usage.call_count == 2
mock_load_service_definitions.assert_called_with(refresh=True)


@use_redis_cluster()
@patch("sentry.processing.backpressure.monitor.iter_cluster_memory_usage")
@patch("sentry.processing.backpressure.monitor.load_service_definitions")
def test_redis_retry_success(mock_load_service_definitions, mock_iter_cluster_memory_usage):
mock_iter_cluster_memory_usage.side_effect = [
ConnectionError("Connection failed"),
[ServiceMemory(name="testRedis", used=50, available=100, percentage=0.5)],
]
services = load_service_definitions()

unhealthy_services = check_service_health(services=services)
redis_services = unhealthy_services.get("redis")

assert isinstance(redis_services, list)
assert len(redis_services) == 0
assert mock_iter_cluster_memory_usage.call_count == 2
mock_load_service_definitions.assert_called_once_with(refresh=True)

0 comments on commit 982dcc8

Please sign in to comment.