Skip to content

Commit

Permalink
Validate cache properly
Browse files Browse the repository at this point in the history
Clarifies the `cached_response.is_cached = True` situation.
  • Loading branch information
Twixes committed Apr 19, 2024
1 parent 1917ed0 commit e457fd4
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 12 deletions.
19 changes: 12 additions & 7 deletions posthog/api/test/test_insight.py
Original file line number Diff line number Diff line change
Expand Up @@ -1181,6 +1181,7 @@ def test_insight_refreshing_query(self, spy_execute_hogql_query) -> None:
self.assertEqual(response["result"][0]["data"], [0, 0, 0, 0, 0, 0, 2, 0])
self.assertEqual(response["last_refresh"], "2012-01-15T04:01:34Z")
self.assertEqual(response["last_modified_at"], "2012-01-15T04:01:34Z")
self.assertFalse(response["is_cached"])

with freeze_time("2012-01-15T05:01:34.000Z"):
_create_event(team=self.team, event="$pageview", distinct_id="1")
Expand All @@ -1190,6 +1191,16 @@ def test_insight_refreshing_query(self, spy_execute_hogql_query) -> None:
self.assertEqual(response["result"][0]["data"], [0, 0, 0, 0, 0, 0, 2, 1])
self.assertEqual(response["last_refresh"], "2012-01-15T05:01:34Z")
self.assertEqual(response["last_modified_at"], "2012-01-15T04:01:34Z") # did not change
self.assertFalse(response["is_cached"])

with freeze_time("2012-01-15T05:17:34.000Z"):
response = self.client.get(f"/api/projects/{self.team.id}/insights/{response['id']}/").json()
self.assertNotIn("code", response)
self.assertEqual(spy_execute_hogql_query.call_count, 2)
self.assertEqual(response["result"][0]["data"], [0, 0, 0, 0, 0, 0, 2, 1])
self.assertEqual(response["last_refresh"], "2012-01-15T05:01:34Z") # Using cached result
self.assertEqual(response["last_modified_at"], "2012-01-15T04:01:34Z") # did not change
self.assertTrue(response["is_cached"])

with freeze_time("2012-01-16T05:01:34.000Z"):
# load it in the context of the dashboard, so has last 14 days as filter
Expand Down Expand Up @@ -1220,13 +1231,7 @@ def test_insight_refreshing_query(self, spy_execute_hogql_query) -> None:
)
self.assertEqual(response["last_refresh"], "2012-01-16T05:01:34Z")
self.assertEqual(response["last_modified_at"], "2012-01-15T04:01:34Z") # did not change

with freeze_time("2012-01-25T05:01:34.000Z"):
response = self.client.get(f"/api/projects/{self.team.id}/insights/{response['id']}/").json()
self.assertNotIn("code", response)
self.assertEqual(spy_execute_hogql_query.call_count, 3)
self.assertEqual(response["last_refresh"], None)
self.assertEqual(response["last_modified_at"], "2012-01-15T04:01:34Z") # did not change
self.assertFalse(response["is_cached"])

#  Test property filter

Expand Down
32 changes: 27 additions & 5 deletions posthog/hogql_queries/query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from django.core.cache import cache
from prometheus_client import Counter
from pydantic import BaseModel, ConfigDict
from sentry_sdk import capture_exception, push_scope

from posthog.clickhouse.query_tagging import tag_queries
from posthog.hogql import ast
Expand Down Expand Up @@ -63,7 +64,7 @@

class ExecutionMode(IntEnum):
CALCULATION_REQUESTED = 2
"""Always recalculate, except if very recent results are present in cache."""
"""Always recalculate."""
CALCULATION_ONLY_IF_STALE = 1
"""Use cache, unless the results are missing or stale."""
CACHE_ONLY = 0
Expand Down Expand Up @@ -327,20 +328,41 @@ def run(
tag_queries(cache_key=cache_key)

if execution_mode != ExecutionMode.CALCULATION_REQUESTED:
cached_response = get_safe_cache(cache_key)
if cached_response:
# Let's look in the cache first
cached_response: CachedQueryResponse | CacheMissResponse
cached_response_candidate = get_safe_cache(cache_key)
match cached_response_candidate:
case CachedQueryResponse():
cached_response = cached_response_candidate
cached_response_candidate.is_cached = True
case None:
cached_response = CacheMissResponse(cache_key=cache_key)
case _:
# Whatever's in cache is malformed, so let's treat is as non-existent
cached_response = CacheMissResponse(cache_key=cache_key)
with push_scope() as scope:
scope.set_tag("cache_key", cache_key)
capture_exception(
ValueError(f"Cached response is of unexpected type {type(cached_response)}, ignoring it")
)

if isinstance(cached_response, CachedQueryResponse):
if not self._is_stale(cached_response):
QUERY_CACHE_HIT_COUNTER.labels(team_id=self.team.pk, cache_hit="hit").inc()
cached_response.is_cached = True
# We have a valid result that's fresh enough, let's return it
return cached_response
else:
QUERY_CACHE_HIT_COUNTER.labels(team_id=self.team.pk, cache_hit="stale").inc()
# We have a stale result. If we aren't allowed to calculate, let's still return it
# – otherwise let's proceed to calculation
if execution_mode == ExecutionMode.CACHE_ONLY:
return cached_response
else:
QUERY_CACHE_HIT_COUNTER.labels(team_id=self.team.pk, cache_hit="miss").inc()
# We have no cached result. If we aren't allowed to calculate, let's return the cache miss
# – otherwise let's proceed to calculation
if execution_mode == ExecutionMode.CACHE_ONLY:
return CacheMissResponse(cache_key=cache_key)
return cached_response

fresh_response_dict = cast(QueryResponse, self.calculate()).model_dump()
fresh_response_dict["is_cached"] = False
Expand Down

0 comments on commit e457fd4

Please sign in to comment.