Skip to content

Commit

Permalink
man
Browse files Browse the repository at this point in the history
  • Loading branch information
pauldambra committed Sep 21, 2023
1 parent 92004c9 commit 1f6bad1
Show file tree
Hide file tree
Showing 9 changed files with 632 additions and 1,558 deletions.
49 changes: 2 additions & 47 deletions ee/session_recordings/session_recording_extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,9 @@
from sentry_sdk import capture_exception, capture_message

from posthog import settings
from posthog.event_usage import report_team_action
from posthog.session_recordings.models.metadata import PersistedRecordingV1
from posthog.session_recordings.models.session_recording import SessionRecording
from posthog.session_recordings.session_recording_helpers import compress_to_string, decompress
from posthog.session_recordings.session_recording_helpers import decompress
from posthog.storage import object_storage

logger = structlog.get_logger(__name__)
Expand Down Expand Up @@ -60,8 +59,6 @@ def persist_recording(recording_id: str, team_id: int) -> None:

logger.info("Persisting recording: init", recording_id=recording_id, team_id=team_id)

start_time = timezone.now()

if not settings.OBJECT_STORAGE_ENABLED:
return

Expand Down Expand Up @@ -104,49 +101,7 @@ def persist_recording(recording_id: str, team_id: int) -> None:
logger.info("Persisting recording: done!", recording_id=recording_id, team_id=team_id, source="s3")
return
else:
# TODO this can be removed when we're happy with the new storage version
with SNAPSHOT_PERSIST_TIME_HISTOGRAM.labels(source="ClickHouse").time():
recording.load_snapshots(100_000) # TODO: Paginate rather than hardcode a limit

content: PersistedRecordingV1 = {
"version": "2022-12-22",
"distinct_id": recording.distinct_id,
"snapshot_data_by_window_id": recording.snapshot_data_by_window_id,
}

string_content = json.dumps(content, default=str)
string_content = compress_to_string(string_content)

logger.info("Persisting recording: writing to S3...", recording_id=recording_id, team_id=team_id)

try:
object_path = recording.build_object_storage_path("2022-12-22")
object_storage.write(object_path, string_content.encode("utf-8"))
recording.object_storage_path = object_path
recording.save()

report_team_action(
recording.team,
"session recording persisted",
{"total_time_ms": (timezone.now() - start_time).total_seconds() * 1000},
)

logger.info(
"Persisting recording: done!", recording_id=recording_id, team_id=team_id, source="ClickHouse"
)
except object_storage.ObjectStorageError as ose:
capture_exception(ose)
report_team_action(
recording.team,
"session recording persist failed",
{"total_time_ms": (timezone.now() - start_time).total_seconds() * 1000, "error": str(ose)},
)
logger.error(
"session_recording.object-storage-error",
recording_id=recording.session_id,
exception=ose,
exc_info=True,
)
raise NotImplementedError("ClickHouse backed recordings are not supported")


def load_persisted_recording(recording: SessionRecording) -> Optional[PersistedRecordingV1]:
Expand Down
99 changes: 5 additions & 94 deletions ee/session_recordings/test/test_session_recording_extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,8 @@
persist_recording,
save_recording_with_new_content,
)
from posthog.models.signals import mute_selected_signals
from posthog.session_recordings.models.session_recording import SessionRecording
from posthog.session_recordings.models.session_recording_playlist import SessionRecordingPlaylist
from posthog.session_recordings.models.session_recording_playlist_item import SessionRecordingPlaylistItem
from posthog.session_recordings.queries.test.session_replay_sql import produce_replay_summary
from posthog.session_recordings.test.test_factory import create_session_recording_events
from posthog.settings import (
Expand Down Expand Up @@ -64,8 +63,8 @@ def create_snapshot(self, session_id, timestamp):
session_id=session_id,
window_id="window_1",
snapshots=[snapshot],
use_recording_table=True,
use_replay_table=False,
use_recording_table=False,
use_replay_table=True,
)

def test_does_not_persist_too_recent_recording(self):
Expand All @@ -78,62 +77,6 @@ def test_does_not_persist_too_recent_recording(self):

assert not recording.object_storage_path

def test_persists_recording_with_original_version_when_not_in_blob_storage(self):
two_minutes_ago = (datetime.now() - timedelta(minutes=2)).replace(tzinfo=timezone.utc)
with freeze_time(two_minutes_ago):
recording = SessionRecording.objects.create(
team=self.team, session_id=f"test_persists_recording-s1-{uuid4()}"
)

self.create_snapshot(recording.session_id, recording.created_at - timedelta(hours=48))
self.create_snapshot(recording.session_id, recording.created_at - timedelta(hours=46))

produce_replay_summary(
session_id=recording.session_id,
team_id=self.team.pk,
first_timestamp=(recording.created_at - timedelta(hours=48)).isoformat(),
last_timestamp=(recording.created_at - timedelta(hours=46)).isoformat(),
distinct_id="distinct_id_1",
first_url="https://app.posthog.com/my-url",
)

persist_recording(recording.session_id, recording.team_id)
recording.refresh_from_db()

assert (
recording.object_storage_path
== f"session_recordings_lts/team-{self.team.pk}/session-{recording.session_id}"
)
assert recording.start_time == recording.created_at - timedelta(hours=48)
assert recording.end_time == recording.created_at - timedelta(hours=46)

assert recording.distinct_id == "distinct_id_1"
assert recording.duration == 7200
assert recording.click_count == 0
assert recording.keypress_count == 0
assert recording.start_url == "https://app.posthog.com/my-url"

assert load_persisted_recording(recording) == {
"version": "2022-12-22",
"distinct_id": "distinct_id_1",
"snapshot_data_by_window_id": {
"window_1": [
{
"timestamp": (recording.created_at - timedelta(hours=48)).timestamp() * 1000,
"has_full_snapshot": 1,
"type": 2,
"data": {"source": 0, "href": long_url},
},
{
"timestamp": (recording.created_at - timedelta(hours=46)).timestamp() * 1000,
"has_full_snapshot": 1,
"type": 2,
"data": {"source": 0, "href": long_url},
},
]
},
}

def test_can_build_different_object_storage_paths(self) -> None:
produce_replay_summary(
session_id="test_can_build_different_object_storage_paths-s1",
Expand Down Expand Up @@ -205,42 +148,10 @@ def test_persists_recording_from_blob_ingested_storage(self):
f"{recording.build_object_storage_path('2023-08-01')}/c",
]

@patch("ee.session_recordings.session_recording_extensions.report_team_action")
def test_persist_tracks_correct_to_posthog(self, mock_capture):
two_minutes_ago = (datetime.now() - timedelta(minutes=2)).replace(tzinfo=timezone.utc)

with freeze_time(two_minutes_ago):
playlist = SessionRecordingPlaylist.objects.create(team=self.team, name="playlist", created_by=self.user)
recording = SessionRecording.objects.create(
team=self.team, session_id=f"test_persist_tracks_correct_to_posthog-s1-{uuid4()}"
)
SessionRecordingPlaylistItem.objects.create(playlist=playlist, recording=recording)

self.create_snapshot(recording.session_id, recording.created_at - timedelta(hours=48))
self.create_snapshot(recording.session_id, recording.created_at - timedelta(hours=46))

produce_replay_summary(
session_id=recording.session_id,
team_id=self.team.pk,
first_timestamp=(recording.created_at - timedelta(hours=48)).isoformat(),
last_timestamp=(recording.created_at - timedelta(hours=46)).isoformat(),
distinct_id="distinct_id_1",
first_url="https://app.posthog.com/my-url",
)

persist_recording(recording.session_id, recording.team_id)

assert mock_capture.call_args_list[0][0][0] == recording.team
assert mock_capture.call_args_list[0][0][1] == "session recording persisted"

for x in [
"total_time_ms",
]:
assert mock_capture.call_args_list[0][0][2][x] > 0

@patch("ee.session_recordings.session_recording_extensions.object_storage.write")
def test_can_save_content_to_new_location(self, mock_write: MagicMock):
with self.settings(OBJECT_STORAGE_SESSION_RECORDING_BLOB_INGESTION_FOLDER=TEST_BUCKET):
# mute selected signals so the post create signal does not try to persist the recording
with self.settings(OBJECT_STORAGE_SESSION_RECORDING_BLOB_INGESTION_FOLDER=TEST_BUCKET), mute_selected_signals():
session_id = f"{uuid4()}"

recording = SessionRecording.objects.create(
Expand Down
6 changes: 4 additions & 2 deletions ee/session_recordings/test/test_session_recording_playlist.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,14 +166,16 @@ def test_filters_based_on_params(self):
assert len(results) == 1
assert results[0]["short_id"] == playlist3.short_id

def test_get_pinned_recordings_for_playlist(self):
@patch("ee.session_recordings.session_recording_extensions.object_storage.copy_objects")
def test_get_pinned_recordings_for_playlist(self, mock_copy_objects: MagicMock) -> None:
mock_copy_objects.return_value = 2

playlist = SessionRecordingPlaylist.objects.create(team=self.team, name="playlist", created_by=self.user)

session_one = f"test_fetch_playlist_recordings-session1-{uuid4()}"
session_two = f"test_fetch_playlist_recordings-session2-{uuid4()}"
three_days_ago = (datetime.now() - timedelta(days=3)).replace(tzinfo=timezone.utc)

# can't immediately switch playlists to replay table
create_session_recording_events(
team_id=self.team.id,
distinct_id="123",
Expand Down
19 changes: 7 additions & 12 deletions posthog/session_recordings/models/session_recording.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
from typing import Any, List, Optional, Literal

from django.conf import settings
from django.db import models
from django.db.models import Count
from django.dispatch import receiver

from posthog.celery import ee_persist_single_recording
from posthog.models.person.person import Person
from posthog.models.signals import mutable_receiver
from posthog.models.team.team import Team
from posthog.models.utils import UUIDModel
from posthog.session_recordings.models.metadata import (
DecompressedRecordingData,
RecordingMatchingEvents,
RecordingMetadata,
)
from posthog.session_recordings.models.session_recording_event import SessionRecordingViewed
from posthog.models.team.team import Team
from posthog.models.utils import UUIDModel
from posthog.session_recordings.queries.session_replay_events import SessionReplayEvents
from django.conf import settings


class SessionRecording(UUIDModel):
Expand Down Expand Up @@ -98,19 +98,14 @@ def load_metadata(self) -> bool:
return True

def load_snapshots(self, limit=20, offset=0) -> None:
from posthog.session_recordings.queries.session_recording_events import SessionRecordingEvents

if self._snapshots:
return

if self.object_storage_path:
self.load_object_data()
else:
snapshots = SessionRecordingEvents(
team=self.team, session_recording_id=self.session_id, recording_start_time=self.start_time
).get_snapshots(limit, offset)

self._snapshots = snapshots
# TODO this can be removed
raise NotImplementedError("Clickhouse backed snapshots are not supported")

def load_object_data(self) -> None:
"""
Expand Down Expand Up @@ -245,7 +240,7 @@ def set_start_url_from_urls(self, urls: Optional[List[str]] = None, first_url: O
self.start_url = url.split("?")[0][:512] if url else None


@receiver(models.signals.post_save, sender=SessionRecording)
@mutable_receiver(models.signals.post_save, sender=SessionRecording)
def attempt_persist_recording(sender, instance: SessionRecording, created: bool, **kwargs):
if created:
ee_persist_single_recording.delay(instance.session_id, instance.team_id)
87 changes: 0 additions & 87 deletions posthog/session_recordings/queries/session_recording_events.py

This file was deleted.

Loading

0 comments on commit 1f6bad1

Please sign in to comment.