From e457fd4a839e9b6a9465e17a3bb777a811db88b2 Mon Sep 17 00:00:00 2001 From: Michael Matloka Date: Fri, 19 Apr 2024 14:43:53 +0200 Subject: [PATCH] Validate cache properly Clarifies the `cached_response.is_cached = True` situation. --- posthog/api/test/test_insight.py | 19 ++++++++++------ posthog/hogql_queries/query_runner.py | 32 ++++++++++++++++++++++----- 2 files changed, 39 insertions(+), 12 deletions(-) diff --git a/posthog/api/test/test_insight.py b/posthog/api/test/test_insight.py index 335a07fe17f1e..6d0d1f5f050ed 100644 --- a/posthog/api/test/test_insight.py +++ b/posthog/api/test/test_insight.py @@ -1181,6 +1181,7 @@ def test_insight_refreshing_query(self, spy_execute_hogql_query) -> None: self.assertEqual(response["result"][0]["data"], [0, 0, 0, 0, 0, 0, 2, 0]) self.assertEqual(response["last_refresh"], "2012-01-15T04:01:34Z") self.assertEqual(response["last_modified_at"], "2012-01-15T04:01:34Z") + self.assertFalse(response["is_cached"]) with freeze_time("2012-01-15T05:01:34.000Z"): _create_event(team=self.team, event="$pageview", distinct_id="1") @@ -1190,6 +1191,16 @@ def test_insight_refreshing_query(self, spy_execute_hogql_query) -> None: self.assertEqual(response["result"][0]["data"], [0, 0, 0, 0, 0, 0, 2, 1]) self.assertEqual(response["last_refresh"], "2012-01-15T05:01:34Z") self.assertEqual(response["last_modified_at"], "2012-01-15T04:01:34Z") # did not change + self.assertFalse(response["is_cached"]) + + with freeze_time("2012-01-15T05:17:34.000Z"): + response = self.client.get(f"/api/projects/{self.team.id}/insights/{response['id']}/").json() + self.assertNotIn("code", response) + self.assertEqual(spy_execute_hogql_query.call_count, 2) + self.assertEqual(response["result"][0]["data"], [0, 0, 0, 0, 0, 0, 2, 1]) + self.assertEqual(response["last_refresh"], "2012-01-15T05:01:34Z") # Using cached result + self.assertEqual(response["last_modified_at"], "2012-01-15T04:01:34Z") # did not change + self.assertTrue(response["is_cached"]) with freeze_time("2012-01-16T05:01:34.000Z"): # load it in the context of the dashboard, so has last 14 days as filter @@ -1220,13 +1231,7 @@ def test_insight_refreshing_query(self, spy_execute_hogql_query) -> None: ) self.assertEqual(response["last_refresh"], "2012-01-16T05:01:34Z") self.assertEqual(response["last_modified_at"], "2012-01-15T04:01:34Z") # did not change - - with freeze_time("2012-01-25T05:01:34.000Z"): - response = self.client.get(f"/api/projects/{self.team.id}/insights/{response['id']}/").json() - self.assertNotIn("code", response) - self.assertEqual(spy_execute_hogql_query.call_count, 3) - self.assertEqual(response["last_refresh"], None) - self.assertEqual(response["last_modified_at"], "2012-01-15T04:01:34Z") # did not change + self.assertFalse(response["is_cached"]) #  Test property filter diff --git a/posthog/hogql_queries/query_runner.py b/posthog/hogql_queries/query_runner.py index 0f54d6f03ea95..7d4b720ac4349 100644 --- a/posthog/hogql_queries/query_runner.py +++ b/posthog/hogql_queries/query_runner.py @@ -7,6 +7,7 @@ from django.core.cache import cache from prometheus_client import Counter from pydantic import BaseModel, ConfigDict +from sentry_sdk import capture_exception, push_scope from posthog.clickhouse.query_tagging import tag_queries from posthog.hogql import ast @@ -63,7 +64,7 @@ class ExecutionMode(IntEnum): CALCULATION_REQUESTED = 2 - """Always recalculate, except if very recent results are present in cache.""" + """Always recalculate.""" CALCULATION_ONLY_IF_STALE = 1 """Use cache, unless the results are missing or stale.""" CACHE_ONLY = 0 @@ -327,20 +328,41 @@ def run( tag_queries(cache_key=cache_key) if execution_mode != ExecutionMode.CALCULATION_REQUESTED: - cached_response = get_safe_cache(cache_key) - if cached_response: + # Let's look in the cache first + cached_response: CachedQueryResponse | CacheMissResponse + cached_response_candidate = get_safe_cache(cache_key) + match cached_response_candidate: + case CachedQueryResponse(): + cached_response = cached_response_candidate + cached_response_candidate.is_cached = True + case None: + cached_response = CacheMissResponse(cache_key=cache_key) + case _: + # Whatever's in cache is malformed, so let's treat is as non-existent + cached_response = CacheMissResponse(cache_key=cache_key) + with push_scope() as scope: + scope.set_tag("cache_key", cache_key) + capture_exception( + ValueError(f"Cached response is of unexpected type {type(cached_response)}, ignoring it") + ) + + if isinstance(cached_response, CachedQueryResponse): if not self._is_stale(cached_response): QUERY_CACHE_HIT_COUNTER.labels(team_id=self.team.pk, cache_hit="hit").inc() - cached_response.is_cached = True + # We have a valid result that's fresh enough, let's return it return cached_response else: QUERY_CACHE_HIT_COUNTER.labels(team_id=self.team.pk, cache_hit="stale").inc() + # We have a stale result. If we aren't allowed to calculate, let's still return it + # – otherwise let's proceed to calculation if execution_mode == ExecutionMode.CACHE_ONLY: return cached_response else: QUERY_CACHE_HIT_COUNTER.labels(team_id=self.team.pk, cache_hit="miss").inc() + # We have no cached result. If we aren't allowed to calculate, let's return the cache miss + # – otherwise let's proceed to calculation if execution_mode == ExecutionMode.CACHE_ONLY: - return CacheMissResponse(cache_key=cache_key) + return cached_response fresh_response_dict = cast(QueryResponse, self.calculate()).model_dump() fresh_response_dict["is_cached"] = False