Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: lts playback for blobby versioned files #17545

Merged
merged 2 commits into from
Sep 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions posthog/session_recordings/session_recording_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,12 +273,13 @@ def _snapshots_v2(self, request: request.Request):

if not source:
sources: List[dict] = []
blob_prefix = recording.build_blob_ingestion_storage_path()
blob_keys = object_storage.list_objects(blob_prefix)

if not blob_keys and recording.storage_version == "2023-08-01":
if recording.object_storage_path and recording.storage_version == "2023-08-01":
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this leaves us broken for older version files but we're following up with that. those files need processing anyway before they can be used

blob_prefix = recording.object_storage_path
blob_keys = object_storage.list_objects(cast(str, blob_prefix))
else:
blob_prefix = recording.build_blob_ingestion_storage_path()
blob_keys = object_storage.list_objects(blob_prefix)

if blob_keys:
for full_key in blob_keys:
Expand Down Expand Up @@ -333,7 +334,15 @@ def _snapshots_v2(self, request: request.Request):
raise exceptions.ValidationError("Must provide a snapshot file blob key")

# very short-lived pre-signed URL
file_key = f"session_recordings/team_id/{self.team.pk}/session_id/{recording.session_id}/data/{blob_key}"
if recording.object_storage_path:
if recording.storage_version == "2023-08-01":
file_key = f"{recording.object_storage_path}/{blob_key}"
else:
file_key = recording.object_storage_path
else:
file_key = (
f"session_recordings/team_id/{self.team.pk}/session_id/{recording.session_id}/data/{blob_key}"
)
url = object_storage.get_presigned_url(file_key, expiration=60)
if not url:
raise exceptions.NotFound("Snapshot file not found")
Expand Down
121 changes: 121 additions & 0 deletions posthog/session_recordings/test/test_lts_session_recordings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import uuid
from typing import List
from unittest.mock import patch, MagicMock, call, Mock

from posthog.models import Team
from posthog.session_recordings.models.session_recording import SessionRecording
from posthog.test.base import APIBaseTest, ClickhouseTestMixin, QueryMatchingTest


class TestSessionRecordings(APIBaseTest, ClickhouseTestMixin, QueryMatchingTest):
def setUp(self):
super().setUp()

# Create a new team each time to ensure no clashing between tests
self.team = Team.objects.create(organization=self.organization, name="New Team")

@patch("posthog.session_recordings.session_recording_api.object_storage.list_objects")
def test_2023_08_01_version_stored_snapshots_can_be_gathered(self, mock_list_objects: MagicMock) -> None:
session_id = str(uuid.uuid4())
lts_storage_path = "purposefully/not/what/we/would/calculate/to/prove/this/is/used"

def list_objects_func(path: str) -> List[str]:
# this mock simulates a recording whose blob storage has been deleted by TTL
# but which has been stored in LTS blob storage
if path == lts_storage_path:
return [
f"{lts_storage_path}/1-2",
f"{lts_storage_path}/3-4",
]
else:
return []

mock_list_objects.side_effect = list_objects_func

# this recording was shared several days ago, it has been stored in LTS storage
SessionRecording.objects.create(
team=self.team,
session_id=session_id,
storage_version="2023-08-01",
object_storage_path=lts_storage_path,
)

response = self.client.get(f"/api/projects/{self.team.id}/session_recordings/{session_id}/snapshots?version=2")
response_data = response.json()

assert mock_list_objects.call_args_list == [
call(lts_storage_path),
]

assert response_data == {
"sources": [
{
"blob_key": "1-2",
"source": "blob",
"start_timestamp": "1970-01-01T00:00:00.001000Z",
"end_timestamp": "1970-01-01T00:00:00.002000Z",
},
{
"blob_key": "3-4",
"source": "blob",
"start_timestamp": "1970-01-01T00:00:00.003000Z",
"end_timestamp": "1970-01-01T00:00:00.004000Z",
},
],
}

@patch("posthog.session_recordings.session_recording_api.requests.get")
@patch("posthog.session_recordings.session_recording_api.object_storage.get_presigned_url")
@patch("posthog.session_recordings.session_recording_api.object_storage.list_objects")
def test_2023_08_01_version_stored_snapshots_can_be_loaded(
self, mock_list_objects: MagicMock, mock_get_presigned_url: MagicMock, mock_requests: MagicMock
) -> None:
session_id = str(uuid.uuid4())
lts_storage_path = "purposefully/not/what/we/would/calculate/to/prove/this/is/used"

def list_objects_func(path: str) -> List[str]:
# this mock simulates a recording whose blob storage has been deleted by TTL
# but which has been stored in LTS blob storage
if path == lts_storage_path:
return [
f"{lts_storage_path}/1-2",
f"{lts_storage_path}/3-4",
]
else:
return []

mock_list_objects.side_effect = list_objects_func
mock_get_presigned_url.return_value = "https://example.com"

mock_response = Mock()
mock_response.raise_for_status.return_value = None
mock_response.raw = "the file contents"

# Set up the mock to work as a context manager
mock_requests.return_value.__enter__.return_value = mock_response
mock_requests.return_value.__exit__.return_value = None

SessionRecording.objects.create(
team=self.team,
session_id=session_id,
storage_version="2023-08-01",
object_storage_path=lts_storage_path,
)

query_parameters = [
"source=blob",
"version=2",
"blob_key=1-2",
]
response = self.client.get(
f"/api/projects/{self.team.id}/session_recordings/{session_id}/snapshots?{'&'.join(query_parameters)}"
)
response_data = response.content.decode("utf-8")

assert mock_list_objects.call_args_list == []

assert mock_get_presigned_url.call_args_list == [
call(f"{lts_storage_path}/1-2", expiration=60),
]

assert response_data == "the file contents"
1 change: 0 additions & 1 deletion posthog/session_recordings/test/test_session_recordings.py
Original file line number Diff line number Diff line change
Expand Up @@ -679,7 +679,6 @@ def list_objects_func(path: str) -> List[str]:
]
}
assert mock_list_objects.call_args_list == [
call(f"session_recordings/team_id/{self.team.pk}/session_id/{session_id}/data"),
call("an lts stored object path"),
]

Expand Down
Loading