Skip to content

Commit

Permalink
fix: lts playback for blobby versioned files (#17545)
Browse files Browse the repository at this point in the history
* fix: lts playback for blobby versioned files

* and when loading snapshots too
  • Loading branch information
pauldambra authored Sep 20, 2023
1 parent fd10852 commit 907e87d
Show file tree
Hide file tree
Showing 3 changed files with 134 additions and 5 deletions.
17 changes: 13 additions & 4 deletions posthog/session_recordings/session_recording_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,12 +273,13 @@ def _snapshots_v2(self, request: request.Request):

if not source:
sources: List[dict] = []
blob_prefix = recording.build_blob_ingestion_storage_path()
blob_keys = object_storage.list_objects(blob_prefix)

if not blob_keys and recording.storage_version == "2023-08-01":
if recording.object_storage_path and recording.storage_version == "2023-08-01":
blob_prefix = recording.object_storage_path
blob_keys = object_storage.list_objects(cast(str, blob_prefix))
else:
blob_prefix = recording.build_blob_ingestion_storage_path()
blob_keys = object_storage.list_objects(blob_prefix)

if blob_keys:
for full_key in blob_keys:
Expand Down Expand Up @@ -333,7 +334,15 @@ def _snapshots_v2(self, request: request.Request):
raise exceptions.ValidationError("Must provide a snapshot file blob key")

# very short-lived pre-signed URL
file_key = f"session_recordings/team_id/{self.team.pk}/session_id/{recording.session_id}/data/{blob_key}"
if recording.object_storage_path:
if recording.storage_version == "2023-08-01":
file_key = f"{recording.object_storage_path}/{blob_key}"
else:
file_key = recording.object_storage_path
else:
file_key = (
f"session_recordings/team_id/{self.team.pk}/session_id/{recording.session_id}/data/{blob_key}"
)
url = object_storage.get_presigned_url(file_key, expiration=60)
if not url:
raise exceptions.NotFound("Snapshot file not found")
Expand Down
121 changes: 121 additions & 0 deletions posthog/session_recordings/test/test_lts_session_recordings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import uuid
from typing import List
from unittest.mock import patch, MagicMock, call, Mock

from posthog.models import Team
from posthog.session_recordings.models.session_recording import SessionRecording
from posthog.test.base import APIBaseTest, ClickhouseTestMixin, QueryMatchingTest


class TestSessionRecordings(APIBaseTest, ClickhouseTestMixin, QueryMatchingTest):
def setUp(self):
super().setUp()

# Create a new team each time to ensure no clashing between tests
self.team = Team.objects.create(organization=self.organization, name="New Team")

@patch("posthog.session_recordings.session_recording_api.object_storage.list_objects")
def test_2023_08_01_version_stored_snapshots_can_be_gathered(self, mock_list_objects: MagicMock) -> None:
session_id = str(uuid.uuid4())
lts_storage_path = "purposefully/not/what/we/would/calculate/to/prove/this/is/used"

def list_objects_func(path: str) -> List[str]:
# this mock simulates a recording whose blob storage has been deleted by TTL
# but which has been stored in LTS blob storage
if path == lts_storage_path:
return [
f"{lts_storage_path}/1-2",
f"{lts_storage_path}/3-4",
]
else:
return []

mock_list_objects.side_effect = list_objects_func

# this recording was shared several days ago, it has been stored in LTS storage
SessionRecording.objects.create(
team=self.team,
session_id=session_id,
storage_version="2023-08-01",
object_storage_path=lts_storage_path,
)

response = self.client.get(f"/api/projects/{self.team.id}/session_recordings/{session_id}/snapshots?version=2")
response_data = response.json()

assert mock_list_objects.call_args_list == [
call(lts_storage_path),
]

assert response_data == {
"sources": [
{
"blob_key": "1-2",
"source": "blob",
"start_timestamp": "1970-01-01T00:00:00.001000Z",
"end_timestamp": "1970-01-01T00:00:00.002000Z",
},
{
"blob_key": "3-4",
"source": "blob",
"start_timestamp": "1970-01-01T00:00:00.003000Z",
"end_timestamp": "1970-01-01T00:00:00.004000Z",
},
],
}

@patch("posthog.session_recordings.session_recording_api.requests.get")
@patch("posthog.session_recordings.session_recording_api.object_storage.get_presigned_url")
@patch("posthog.session_recordings.session_recording_api.object_storage.list_objects")
def test_2023_08_01_version_stored_snapshots_can_be_loaded(
self, mock_list_objects: MagicMock, mock_get_presigned_url: MagicMock, mock_requests: MagicMock
) -> None:
session_id = str(uuid.uuid4())
lts_storage_path = "purposefully/not/what/we/would/calculate/to/prove/this/is/used"

def list_objects_func(path: str) -> List[str]:
# this mock simulates a recording whose blob storage has been deleted by TTL
# but which has been stored in LTS blob storage
if path == lts_storage_path:
return [
f"{lts_storage_path}/1-2",
f"{lts_storage_path}/3-4",
]
else:
return []

mock_list_objects.side_effect = list_objects_func
mock_get_presigned_url.return_value = "https://example.com"

mock_response = Mock()
mock_response.raise_for_status.return_value = None
mock_response.raw = "the file contents"

# Set up the mock to work as a context manager
mock_requests.return_value.__enter__.return_value = mock_response
mock_requests.return_value.__exit__.return_value = None

SessionRecording.objects.create(
team=self.team,
session_id=session_id,
storage_version="2023-08-01",
object_storage_path=lts_storage_path,
)

query_parameters = [
"source=blob",
"version=2",
"blob_key=1-2",
]
response = self.client.get(
f"/api/projects/{self.team.id}/session_recordings/{session_id}/snapshots?{'&'.join(query_parameters)}"
)
response_data = response.content.decode("utf-8")

assert mock_list_objects.call_args_list == []

assert mock_get_presigned_url.call_args_list == [
call(f"{lts_storage_path}/1-2", expiration=60),
]

assert response_data == "the file contents"
1 change: 0 additions & 1 deletion posthog/session_recordings/test/test_session_recordings.py
Original file line number Diff line number Diff line change
Expand Up @@ -679,7 +679,6 @@ def list_objects_func(path: str) -> List[str]:
]
}
assert mock_list_objects.call_args_list == [
call(f"session_recordings/team_id/{self.team.pk}/session_id/{session_id}/data"),
call("an lts stored object path"),
]

Expand Down

0 comments on commit 907e87d

Please sign in to comment.