Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: hot path is cooler now #21888

Merged
merged 3 commits into from
Apr 26, 2024
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 34 additions & 11 deletions posthog/session_recordings/queries/session_replay_events.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from datetime import datetime, timedelta
from typing import Optional

import pytz
from django.conf import settings
from django.core.cache import cache

from posthog.clickhouse.client import sync_execute
from posthog.cloud_utils import is_cloud
Expand All @@ -15,26 +17,47 @@
)


def seconds_until_midnight():
now = datetime.now(pytz.timezone("UTC"))
midnight = (now + timedelta(days=1)).replace(hour=0, minute=0, second=0, microsecond=0)
difference = midnight - now
return difference.seconds


class SessionReplayEvents:
def exists(self, session_id: str, team: Team) -> bool:
# TODO we could cache this result when its result is True.
cache_key = f"summarize_recording_existence_team_{team.pk}_id_{session_id}"
cached_response = cache.get(cache_key)
if isinstance(cached_response, bool):
return cached_response

# Once we know that session exists we don't need to check again (until the end of the day since TTL might apply)
existence = self._check_exists_within_days(ttl_days(team), session_id, team) or self._check_exists_within_days(
370, session_id, team
)

if existence:
# let's be cautious and not cache non-existence
# in case we manage to check existence just before the first event hits ClickHouse
# that should be impossible but cache invalidation is hard etc etc
cache.set(cache_key, existence, timeout=seconds_until_midnight())
return existence

@staticmethod
def _check_exists_within_days(days: int, session_id: str, team: Team) -> bool:
result = sync_execute(
"""
SELECT count(1)
SELECT count()
FROM session_replay_events
WHERE team_id = %(team_id)s
PREWHERE team_id = %(team_id)s
AND session_id = %(session_id)s
-- we should check for the `ttl_days(team)` TTL here,
-- but for a shared/pinned recording
-- the TTL effectively becomes 1 year
-- and we don't know which we're dealing with
AND min_first_timestamp >= now() - INTERVAL 370 DAY
AND min_first_timestamp >= now() - INTERVAL %(days) DAY
AND min_first_timestamp <= now()
""",
{
"team_id": team.pk,
"session_id": session_id,
"recording_ttl_days": ttl_days(team),
"days": days,
},
)
return result[0][0] > 0
Expand Down Expand Up @@ -144,7 +167,6 @@ def get_events(


def ttl_days(team: Team) -> int:
ttl_days = (get_instance_setting("RECORDINGS_TTL_WEEKS") or 3) * 7
if is_cloud():
# NOTE: We use Playlists as a proxy to see if they are subbed to Recordings
is_paid = team.organization.is_feature_available(AvailableFeature.RECORDINGS_PLAYLISTS)
Expand All @@ -155,5 +177,6 @@ def ttl_days(team: Team) -> int:

if days_since_blob_ingestion < ttl_days:
ttl_days = days_since_blob_ingestion

else:
ttl_days = (get_instance_setting("RECORDINGS_TTL_WEEKS") or 3) * 7
return ttl_days
Loading