Skip to content

Commit

Permalink
feat(insights): invalidate obviously stale cache results (#17103)
Browse files Browse the repository at this point in the history
  • Loading branch information
thmsobrmlr authored Sep 11, 2023
1 parent 7f24c42 commit 3fb024b
Show file tree
Hide file tree
Showing 4 changed files with 279 additions and 14 deletions.
25 changes: 17 additions & 8 deletions posthog/datetime.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,22 @@
from datetime import datetime, timedelta


def end_of_day(reference_date: datetime):
return datetime(
year=reference_date.year, month=reference_date.month, day=reference_date.day, tzinfo=reference_date.tzinfo
) + timedelta(days=1, microseconds=-1)
def start_of_hour(dt: datetime) -> datetime:
return datetime(year=dt.year, month=dt.month, day=dt.day, hour=dt.hour, tzinfo=dt.tzinfo)


def start_of_day(reference_date: datetime):
return datetime(
year=reference_date.year, month=reference_date.month, day=reference_date.day, tzinfo=reference_date.tzinfo
)
def start_of_day(dt: datetime):
return datetime(year=dt.year, month=dt.month, day=dt.day, tzinfo=dt.tzinfo)


def end_of_day(dt: datetime):
return datetime(year=dt.year, month=dt.month, day=dt.day, tzinfo=dt.tzinfo) + timedelta(days=1, microseconds=-1)


def start_of_week(dt: datetime) -> datetime:
# weeks start on sunday
return datetime(year=dt.year, month=dt.month, day=dt.day, tzinfo=dt.tzinfo) - timedelta(days=(dt.weekday() + 1) % 7)


def start_of_month(dt: datetime) -> datetime:
return datetime(year=dt.year, month=dt.month, day=1, tzinfo=dt.tzinfo)
65 changes: 62 additions & 3 deletions posthog/decorators.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,26 @@
from datetime import datetime
from enum import Enum
from functools import wraps
from typing import Any, Callable, Dict, List, TypeVar, Union, cast
from zoneinfo import ZoneInfo

import posthoganalytics
from django.urls import resolve
from django.utils.timezone import now
from rest_framework.request import Request
from rest_framework.viewsets import GenericViewSet
from statshog.defaults.django import statsd

from posthog.clickhouse.query_tagging import tag_queries
from posthog.cloud_utils import is_cloud
from posthog.datetime import start_of_day, start_of_hour, start_of_month, start_of_week
from posthog.models import User
from posthog.models.filters.filter import Filter
from posthog.models.filters.path_filter import PathFilter
from posthog.models.filters.retention_filter import RetentionFilter
from posthog.models.filters.stickiness_filter import StickinessFilter
from posthog.models.filters.utils import get_filter
from posthog.models.team.team import Team
from posthog.utils import refresh_requested_by_client

from .utils import generate_cache_key, get_safe_cache
Expand Down Expand Up @@ -74,9 +84,12 @@ def wrapper(self, request) -> T:
route = "unknown"

if cached_result_package and cached_result_package.get("result"):
cached_result_package["is_cached"] = True
statsd.incr("posthog_cached_function_cache_hit", tags={"route": route})
return cached_result_package
if not is_stale(team, filter, cached_result_package):
cached_result_package["is_cached"] = True
statsd.incr("posthog_cached_function_cache_hit", tags={"route": route})
return cached_result_package
else:
statsd.incr("posthog_cached_function_cache_stale", tags={"route": route})
else:
statsd.incr("posthog_cached_function_cache_miss", tags={"route": route})

Expand All @@ -93,3 +106,49 @@ def wrapper(self, request) -> T:
return fresh_result_package

return wrapper


def stale_cache_invalidation_disabled(team: Team) -> bool:
"""Can be disabled temporarly to help in cases of service degradation."""
if is_cloud(): # on PostHog Cloud, use the feature flag
return not posthoganalytics.feature_enabled(
"stale-cache-invalidation-enabled",
str(team.uuid),
groups={"organization": str(team.organization.id)},
group_properties={
"organization": {"id": str(team.organization.id), "created_at": team.organization.created_at}
},
only_evaluate_locally=True,
send_feature_flag_events=False,
)
else:
return False


def is_stale(team: Team, filter: Filter | RetentionFilter | StickinessFilter | PathFilter, cached_result: Any) -> bool:
"""Indicates wether a cache item is obviously outdated based on filters,
i.e. the next time interval was entered since the last computation. For
example an insight with -7d date range that was last computed yesterday.
The same insight refreshed today wouldn't be marked as stale.
"""

if stale_cache_invalidation_disabled(team):
return False

last_refresh = cached_result.get("last_refresh", None)
date_to = min([filter.date_to, datetime.now(tz=ZoneInfo("UTC"))]) # can't be later than now
interval = filter.period.lower() if isinstance(filter, RetentionFilter) else filter.interval

if last_refresh is None:
raise Exception("Cached results require a last_refresh")

if interval == "hour":
return start_of_hour(date_to) > start_of_hour(last_refresh)
elif interval == "day":
return start_of_day(date_to) > start_of_day(last_refresh)
elif interval == "week":
return start_of_week(date_to) > start_of_week(last_refresh)
elif interval == "month":
return start_of_month(date_to) > start_of_month(last_refresh)
else:
return False
33 changes: 33 additions & 0 deletions posthog/test/test_datetime.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from datetime import datetime, timezone

from posthog.datetime import start_of_hour, start_of_day, end_of_day, start_of_week, start_of_month


def test_start_of_hour():
assert start_of_hour(datetime.fromisoformat("2023-02-08T12:05:23+00:00")) == datetime.fromisoformat(
"2023-02-08T12:00:00+00:00"
)


def test_start_of_day():
assert start_of_day(datetime.fromisoformat("2023-02-08T12:05:23+00:00")) == datetime.fromisoformat(
"2023-02-08T00:00:00+00:00"
)


def test_end_of_day():
assert end_of_day(datetime.fromisoformat("2023-02-08T12:05:23+00:00")) == datetime(
2023, 2, 8, 23, 59, 59, 999999, tzinfo=timezone.utc
)


def test_start_of_week():
assert start_of_week(datetime.fromisoformat("2023-02-08T12:05:23+00:00")) == datetime.fromisoformat(
"2023-02-05T00:00:00+00:00"
)


def test_start_of_month():
assert start_of_month(datetime.fromisoformat("2023-02-08T12:05:23+00:00")) == datetime.fromisoformat(
"2023-02-01T00:00:00+00:00"
)
170 changes: 167 additions & 3 deletions posthog/test/test_decorators.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
from posthog.decorators import cached_by_filters
from datetime import datetime
from freezegun import freeze_time
from posthog.decorators import cached_by_filters, is_stale

from django.core.cache import cache

from rest_framework.test import APIRequestFactory
from rest_framework.viewsets import GenericViewSet
from rest_framework.response import Response
from posthog.models.filters.filter import Filter
from posthog.models.filters.path_filter import PathFilter
from posthog.models.filters.retention_filter import RetentionFilter
from posthog.models.filters.stickiness_filter import StickinessFilter

from posthog.test.base import APIBaseTest
from posthog.test.base import APIBaseTest, BaseTest
from posthog.api import router

factory = APIRequestFactory()
Expand All @@ -22,7 +28,7 @@ def calculate_with_filters(self, request):
return {"result": "bla"}


class TestDecorators(APIBaseTest):
class TestCachedByFiltersDecorator(APIBaseTest):
def setUp(self) -> None:
cache.clear()

Expand Down Expand Up @@ -61,3 +67,161 @@ def test_cache_bypass_with_invalidation_key_param(self) -> None:
response = self.client.get(f"/api/dummy", data={"cache_invalidation_key": "abc"}).json()

assert response["is_cached"] is False

def test_discards_stale_response(self) -> None:
with freeze_time("2023-02-08T12:05:23Z"):
# cache the result
self.client.get(f"/api/dummy").json()

with freeze_time("2023-02-10T12:00:00Z"):
# we don't need to add filters, since -7d with a
# daily interval is the default
response = self.client.get(f"/api/dummy").json()
assert response["is_cached"] is False


class TestIsStaleHelper(BaseTest):
cached_response = {"last_refresh": datetime.fromisoformat("2023-02-08T12:05:23+00:00"), "result": "bla"}

def test_keeps_fresh_hourly_result(self) -> None:
with freeze_time("2023-02-08T12:59:59Z"):
filter = Filter(data={"interval": "hour"})

stale = is_stale(self.team, filter, self.cached_response)

assert stale is False

def test_discards_stale_hourly_result(self) -> None:
with freeze_time("2023-02-08T13:00:00Z"):
filter = Filter(data={"interval": "hour"})

stale = is_stale(self.team, filter, self.cached_response)

assert stale is True

def test_keeps_fresh_daily_result(self) -> None:
with freeze_time("2023-02-08T23:59:59Z"):
filter = Filter(data={"interval": "day"})

stale = is_stale(self.team, filter, self.cached_response)

assert stale is False

def test_discards_stale_daily_result(self) -> None:
with freeze_time("2023-02-09T00:00:00Z"):
filter = Filter(data={"interval": "day"})

stale = is_stale(self.team, filter, self.cached_response)

assert stale is True

def test_keeps_fresh_weekly_result(self) -> None:
with freeze_time("2023-02-11T23:59:59Z"):
filter = Filter(data={"interval": "week"})

stale = is_stale(self.team, filter, self.cached_response)

assert stale is False

def test_discards_stale_weekly_result(self) -> None:
with freeze_time("2023-02-12T00:00:00Z"):
filter = Filter(data={"interval": "week"})

stale = is_stale(self.team, filter, self.cached_response)

assert stale is True

def test_keeps_fresh_monthly_result(self) -> None:
with freeze_time("2023-02-28T23:59:59Z"):
filter = Filter(data={"interval": "month"})

stale = is_stale(self.team, filter, self.cached_response)

assert stale is False

def test_discards_stale_monthly_result(self) -> None:
with freeze_time("2023-03-01T00:00:00Z"):
filter = Filter(data={"interval": "month"})

stale = is_stale(self.team, filter, self.cached_response)

assert stale is True

def test_keeps_fresh_result_from_fixed_range(self) -> None:
filter = Filter(data={"interval": "day", "date_from": "2000-01-01", "date_to": "2000-01-10"})

stale = is_stale(self.team, filter, self.cached_response)

assert stale is False

def test_keeps_fresh_result_with_date_to_in_future(self) -> None:
with freeze_time("2023-02-08T23:59:59Z"):
filter = Filter(data={"interval": "day", "date_to": "2999-01-01"})

stale = is_stale(self.team, filter, self.cached_response)

assert stale is False

def test_keeps_fresh_stickiness_result(self) -> None:
with freeze_time("2023-02-08T23:59:59Z"):
filter = StickinessFilter(data={}, team=self.team)

stale = is_stale(self.team, filter, self.cached_response)

assert stale is False

def test_discards_stale_stickiness_result(self) -> None:
with freeze_time("2023-02-09T00:00:00Z"):
filter = StickinessFilter(data={}, team=self.team)

stale = is_stale(self.team, filter, self.cached_response)

assert stale is True

def test_keeps_fresh_path_result(self) -> None:
with freeze_time("2023-02-08T23:59:59Z"):
filter = PathFilter()

stale = is_stale(self.team, filter, self.cached_response)

assert stale is False

def test_discards_stale_path_result(self) -> None:
with freeze_time("2023-02-09T00:00:00Z"):
filter = PathFilter()

stale = is_stale(self.team, filter, self.cached_response)

assert stale is True

def test_keeps_fresh_retention_hourly_result(self) -> None:
with freeze_time("2023-02-08T12:59:59Z"):
filter = RetentionFilter(data={"period": "Hour"})

stale = is_stale(self.team, filter, self.cached_response)

assert stale is False

def test_discards_stale_retention_hourly_result(self) -> None:
with freeze_time("2023-02-08T13:00:00Z"):
filter = RetentionFilter(data={"period": "Hour"})

stale = is_stale(self.team, filter, self.cached_response)

assert stale is True

def test_keeps_fresh_retention_result(self) -> None:
with freeze_time("2023-02-08T23:59:59Z"):
filter = RetentionFilter()

stale = is_stale(self.team, filter, self.cached_response)

assert stale is False

def test_discards_stale_retention_result(self) -> None:
with freeze_time("2023-02-09T00:00:00Z"):
filter = RetentionFilter()

stale = is_stale(self.team, filter, self.cached_response)

assert stale is True

0 comments on commit 3fb024b

Please sign in to comment.