From bd35dcd4177394afc80996705f99c6300b077edf Mon Sep 17 00:00:00 2001 From: Paul D'Ambra Date: Wed, 20 Sep 2023 13:35:00 +0100 Subject: [PATCH] fix: lts playback for blobby versioned files (#17545) * fix: lts playback for blobby versioned files * and when loading snapshots too --- .../session_recording_api.py | 17 ++- .../test/test_lts_session_recordings.py | 121 ++++++++++++++++++ .../test/test_session_recordings.py | 1 - 3 files changed, 134 insertions(+), 5 deletions(-) create mode 100644 posthog/session_recordings/test/test_lts_session_recordings.py diff --git a/posthog/session_recordings/session_recording_api.py b/posthog/session_recordings/session_recording_api.py index 41a668083b534..81db3cd7a69f3 100644 --- a/posthog/session_recordings/session_recording_api.py +++ b/posthog/session_recordings/session_recording_api.py @@ -273,12 +273,13 @@ def _snapshots_v2(self, request: request.Request): if not source: sources: List[dict] = [] - blob_prefix = recording.build_blob_ingestion_storage_path() - blob_keys = object_storage.list_objects(blob_prefix) - if not blob_keys and recording.storage_version == "2023-08-01": + if recording.object_storage_path and recording.storage_version == "2023-08-01": blob_prefix = recording.object_storage_path blob_keys = object_storage.list_objects(cast(str, blob_prefix)) + else: + blob_prefix = recording.build_blob_ingestion_storage_path() + blob_keys = object_storage.list_objects(blob_prefix) if blob_keys: for full_key in blob_keys: @@ -333,7 +334,15 @@ def _snapshots_v2(self, request: request.Request): raise exceptions.ValidationError("Must provide a snapshot file blob key") # very short-lived pre-signed URL - file_key = f"session_recordings/team_id/{self.team.pk}/session_id/{recording.session_id}/data/{blob_key}" + if recording.object_storage_path: + if recording.storage_version == "2023-08-01": + file_key = f"{recording.object_storage_path}/{blob_key}" + else: + file_key = recording.object_storage_path + else: + file_key = ( + f"session_recordings/team_id/{self.team.pk}/session_id/{recording.session_id}/data/{blob_key}" + ) url = object_storage.get_presigned_url(file_key, expiration=60) if not url: raise exceptions.NotFound("Snapshot file not found") diff --git a/posthog/session_recordings/test/test_lts_session_recordings.py b/posthog/session_recordings/test/test_lts_session_recordings.py new file mode 100644 index 0000000000000..55959a20738cd --- /dev/null +++ b/posthog/session_recordings/test/test_lts_session_recordings.py @@ -0,0 +1,121 @@ +import uuid +from typing import List +from unittest.mock import patch, MagicMock, call, Mock + +from posthog.models import Team +from posthog.session_recordings.models.session_recording import SessionRecording +from posthog.test.base import APIBaseTest, ClickhouseTestMixin, QueryMatchingTest + + +class TestSessionRecordings(APIBaseTest, ClickhouseTestMixin, QueryMatchingTest): + def setUp(self): + super().setUp() + + # Create a new team each time to ensure no clashing between tests + self.team = Team.objects.create(organization=self.organization, name="New Team") + + @patch("posthog.session_recordings.session_recording_api.object_storage.list_objects") + def test_2023_08_01_version_stored_snapshots_can_be_gathered(self, mock_list_objects: MagicMock) -> None: + session_id = str(uuid.uuid4()) + lts_storage_path = "purposefully/not/what/we/would/calculate/to/prove/this/is/used" + + def list_objects_func(path: str) -> List[str]: + # this mock simulates a recording whose blob storage has been deleted by TTL + # but which has been stored in LTS blob storage + if path == lts_storage_path: + return [ + f"{lts_storage_path}/1-2", + f"{lts_storage_path}/3-4", + ] + else: + return [] + + mock_list_objects.side_effect = list_objects_func + + # this recording was shared several days ago, it has been stored in LTS storage + SessionRecording.objects.create( + team=self.team, + session_id=session_id, + storage_version="2023-08-01", + object_storage_path=lts_storage_path, + ) + + response = self.client.get(f"/api/projects/{self.team.id}/session_recordings/{session_id}/snapshots?version=2") + response_data = response.json() + + assert mock_list_objects.call_args_list == [ + call(lts_storage_path), + ] + + assert response_data == { + "sources": [ + { + "blob_key": "1-2", + "source": "blob", + "start_timestamp": "1970-01-01T00:00:00.001000Z", + "end_timestamp": "1970-01-01T00:00:00.002000Z", + }, + { + "blob_key": "3-4", + "source": "blob", + "start_timestamp": "1970-01-01T00:00:00.003000Z", + "end_timestamp": "1970-01-01T00:00:00.004000Z", + }, + ], + } + + @patch("posthog.session_recordings.session_recording_api.requests.get") + @patch("posthog.session_recordings.session_recording_api.object_storage.get_presigned_url") + @patch("posthog.session_recordings.session_recording_api.object_storage.list_objects") + def test_2023_08_01_version_stored_snapshots_can_be_loaded( + self, mock_list_objects: MagicMock, mock_get_presigned_url: MagicMock, mock_requests: MagicMock + ) -> None: + session_id = str(uuid.uuid4()) + lts_storage_path = "purposefully/not/what/we/would/calculate/to/prove/this/is/used" + + def list_objects_func(path: str) -> List[str]: + # this mock simulates a recording whose blob storage has been deleted by TTL + # but which has been stored in LTS blob storage + if path == lts_storage_path: + return [ + f"{lts_storage_path}/1-2", + f"{lts_storage_path}/3-4", + ] + else: + return [] + + mock_list_objects.side_effect = list_objects_func + mock_get_presigned_url.return_value = "https://example.com" + + mock_response = Mock() + mock_response.raise_for_status.return_value = None + mock_response.raw = "the file contents" + + # Set up the mock to work as a context manager + mock_requests.return_value.__enter__.return_value = mock_response + mock_requests.return_value.__exit__.return_value = None + + SessionRecording.objects.create( + team=self.team, + session_id=session_id, + storage_version="2023-08-01", + object_storage_path=lts_storage_path, + ) + + query_parameters = [ + "source=blob", + "version=2", + "blob_key=1-2", + ] + response = self.client.get( + f"/api/projects/{self.team.id}/session_recordings/{session_id}/snapshots?{'&'.join(query_parameters)}" + ) + response_data = response.content.decode("utf-8") + + assert mock_list_objects.call_args_list == [] + + assert mock_get_presigned_url.call_args_list == [ + call(f"{lts_storage_path}/1-2", expiration=60), + ] + + assert response_data == "the file contents" diff --git a/posthog/session_recordings/test/test_session_recordings.py b/posthog/session_recordings/test/test_session_recordings.py index c1467d6616ddf..a535fba873f09 100644 --- a/posthog/session_recordings/test/test_session_recordings.py +++ b/posthog/session_recordings/test/test_session_recordings.py @@ -679,7 +679,6 @@ def list_objects_func(path: str) -> List[str]: ] } assert mock_list_objects.call_args_list == [ - call(f"session_recordings/team_id/{self.team.pk}/session_id/{session_id}/data"), call("an lts stored object path"), ]