diff --git a/posthog/api/capture.py b/posthog/api/capture.py index 8b6a576012d9b..ba07e55f34900 100644 --- a/posthog/api/capture.py +++ b/posthog/api/capture.py @@ -35,7 +35,6 @@ from posthog.metrics import LABEL_RESOURCE_TYPE from posthog.models.utils import UUIDT from posthog.session_recordings.session_recording_helpers import ( - legacy_preprocess_session_recording_events_for_clickhouse, preprocess_replay_events_for_blob_ingestion, split_replay_events, ) @@ -367,14 +366,9 @@ def get_event(request): capture_exception(e) try: + # split the replay events off as they are passed to kafka separately replay_events, other_events = split_replay_events(events) - processed_replay_events = replay_events - - if len(replay_events) > 0: - # Legacy solution stays in place - processed_replay_events = legacy_preprocess_session_recording_events_for_clickhouse(replay_events) - - events = processed_replay_events + other_events + events = other_events except ValueError as e: return cors_response( diff --git a/posthog/session_recordings/session_recording_helpers.py b/posthog/session_recordings/session_recording_helpers.py index 960ac0021c817..8c7f1964a7d51 100644 --- a/posthog/session_recordings/session_recording_helpers.py +++ b/posthog/session_recordings/session_recording_helpers.py @@ -8,7 +8,6 @@ from dateutil.parser import ParserError, parse from sentry_sdk.api import capture_exception -from posthog.models import utils from posthog.session_recordings.models.metadata import ( DecompressedRecordingData, SessionRecordingEventSummary, @@ -88,44 +87,6 @@ class RRWEB_MAP_EVENT_DATA_TYPE: Event = Dict[str, Any] -def legacy_preprocess_session_recording_events_for_clickhouse( - events: List[Event], chunk_size=512 * 1024 -) -> List[Event]: - return _process_windowed_events(events, lambda x: legacy_compress_and_chunk_snapshots(x, chunk_size=chunk_size)) - - -def legacy_compress_and_chunk_snapshots(events: List[Event], chunk_size=512 * 1024) -> Generator[Event, None, None]: - data_list = list(flatten([event["properties"]["$snapshot_data"] for event in events], max_depth=1)) - session_id = events[0]["properties"]["$session_id"] - window_id = events[0]["properties"].get("$window_id") - has_full_snapshot = any(snapshot_data["type"] == RRWEB_MAP_EVENT_TYPE.FullSnapshot for snapshot_data in data_list) - compressed_data = compress_to_string(json.dumps(data_list)) - - id = str(utils.UUIDT()) - chunks = chunk_string(compressed_data, chunk_size) - - for index, chunk in enumerate(chunks): - yield { - **events[0], - "properties": { - **events[0]["properties"], - "$session_id": session_id, - "$window_id": window_id, - # If it is the first chunk we include all events - "$snapshot_data": { - "chunk_id": id, - "chunk_index": index, - "chunk_count": len(chunks), - "data": chunk, - "compression": "gzip-base64", - "has_full_snapshot": has_full_snapshot, - # We only store this field on the first chunk as it contains all events, not just this chunk - "events_summary": get_events_summary_from_snapshot_data(data_list) if index == 0 else None, - }, - }, - } - - def split_replay_events(events: List[Event]) -> Tuple[List[Event], List[Event]]: replay, other = [], [] diff --git a/posthog/session_recordings/test/test_factory.py b/posthog/session_recordings/test/test_factory.py index 4213ff02f5566..195286683d0de 100644 --- a/posthog/session_recordings/test/test_factory.py +++ b/posthog/session_recordings/test/test_factory.py @@ -12,7 +12,6 @@ from posthog.session_recordings.queries.test.session_replay_sql import produce_replay_summary from posthog.session_recordings.session_recording_helpers import ( RRWEB_MAP_EVENT_TYPE, - legacy_preprocess_session_recording_events_for_clickhouse, ) from posthog.utils import cast_timestamp_or_now @@ -73,42 +72,6 @@ def create_session_recording_events( last_timestamp=timestamp, ) - if use_recording_table: - if window_id is None: - window_id = session_id - - if not snapshots: - snapshots = [ - { - "type": RRWEB_MAP_EVENT_TYPE.FullSnapshot, - "data": {}, - "timestamp": round(timestamp.timestamp() * 1000), # NOTE: rrweb timestamps are milliseconds - } - ] - - # We use the same code path for chunking events by mocking this as an typical posthog event - mock_events = [ - { - "event": "$snapshot", - "properties": { - "$session_id": session_id, - "$window_id": window_id, - "$snapshot_data": snapshot, - }, - } - for snapshot in snapshots - ] - - for event in legacy_preprocess_session_recording_events_for_clickhouse(mock_events, chunk_size=chunk_size): - _insert_session_recording_event( - team_id=team_id, - distinct_id=distinct_id, - session_id=session_id, - window_id=window_id, - timestamp=timestamp, - snapshot_data=event["properties"]["$snapshot_data"], - ) - # Pre-compression and events_summary additions which potentially existed for some self-hosted instances def create_uncompressed_session_recording_event(