Skip to content

Commit

Permalink
an
Browse files Browse the repository at this point in the history
  • Loading branch information
pauldambra committed Sep 21, 2023
1 parent b2ce960 commit c69487f
Show file tree
Hide file tree
Showing 3 changed files with 2 additions and 84 deletions.
10 changes: 2 additions & 8 deletions posthog/api/capture.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
from posthog.metrics import LABEL_RESOURCE_TYPE
from posthog.models.utils import UUIDT
from posthog.session_recordings.session_recording_helpers import (
legacy_preprocess_session_recording_events_for_clickhouse,
preprocess_replay_events_for_blob_ingestion,
split_replay_events,
)
Expand Down Expand Up @@ -367,14 +366,9 @@ def get_event(request):
capture_exception(e)

try:
# split the replay events off as they are passed to kafka separately
replay_events, other_events = split_replay_events(events)
processed_replay_events = replay_events

if len(replay_events) > 0:
# Legacy solution stays in place
processed_replay_events = legacy_preprocess_session_recording_events_for_clickhouse(replay_events)

events = processed_replay_events + other_events
events = other_events

except ValueError as e:
return cors_response(
Expand Down
39 changes: 0 additions & 39 deletions posthog/session_recordings/session_recording_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from dateutil.parser import ParserError, parse
from sentry_sdk.api import capture_exception

from posthog.models import utils
from posthog.session_recordings.models.metadata import (
DecompressedRecordingData,
SessionRecordingEventSummary,
Expand Down Expand Up @@ -88,44 +87,6 @@ class RRWEB_MAP_EVENT_DATA_TYPE:
Event = Dict[str, Any]


def legacy_preprocess_session_recording_events_for_clickhouse(
events: List[Event], chunk_size=512 * 1024
) -> List[Event]:
return _process_windowed_events(events, lambda x: legacy_compress_and_chunk_snapshots(x, chunk_size=chunk_size))


def legacy_compress_and_chunk_snapshots(events: List[Event], chunk_size=512 * 1024) -> Generator[Event, None, None]:
data_list = list(flatten([event["properties"]["$snapshot_data"] for event in events], max_depth=1))
session_id = events[0]["properties"]["$session_id"]
window_id = events[0]["properties"].get("$window_id")
has_full_snapshot = any(snapshot_data["type"] == RRWEB_MAP_EVENT_TYPE.FullSnapshot for snapshot_data in data_list)
compressed_data = compress_to_string(json.dumps(data_list))

id = str(utils.UUIDT())
chunks = chunk_string(compressed_data, chunk_size)

for index, chunk in enumerate(chunks):
yield {
**events[0],
"properties": {
**events[0]["properties"],
"$session_id": session_id,
"$window_id": window_id,
# If it is the first chunk we include all events
"$snapshot_data": {
"chunk_id": id,
"chunk_index": index,
"chunk_count": len(chunks),
"data": chunk,
"compression": "gzip-base64",
"has_full_snapshot": has_full_snapshot,
# We only store this field on the first chunk as it contains all events, not just this chunk
"events_summary": get_events_summary_from_snapshot_data(data_list) if index == 0 else None,
},
},
}


def split_replay_events(events: List[Event]) -> Tuple[List[Event], List[Event]]:
replay, other = [], []

Expand Down
37 changes: 0 additions & 37 deletions posthog/session_recordings/test/test_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from posthog.session_recordings.queries.test.session_replay_sql import produce_replay_summary
from posthog.session_recordings.session_recording_helpers import (
RRWEB_MAP_EVENT_TYPE,
legacy_preprocess_session_recording_events_for_clickhouse,
)
from posthog.utils import cast_timestamp_or_now

Expand Down Expand Up @@ -73,42 +72,6 @@ def create_session_recording_events(
last_timestamp=timestamp,
)

if use_recording_table:
if window_id is None:
window_id = session_id

if not snapshots:
snapshots = [
{
"type": RRWEB_MAP_EVENT_TYPE.FullSnapshot,
"data": {},
"timestamp": round(timestamp.timestamp() * 1000), # NOTE: rrweb timestamps are milliseconds
}
]

# We use the same code path for chunking events by mocking this as an typical posthog event
mock_events = [
{
"event": "$snapshot",
"properties": {
"$session_id": session_id,
"$window_id": window_id,
"$snapshot_data": snapshot,
},
}
for snapshot in snapshots
]

for event in legacy_preprocess_session_recording_events_for_clickhouse(mock_events, chunk_size=chunk_size):
_insert_session_recording_event(
team_id=team_id,
distinct_id=distinct_id,
session_id=session_id,
window_id=window_id,
timestamp=timestamp,
snapshot_data=event["properties"]["$snapshot_data"],
)


# Pre-compression and events_summary additions which potentially existed for some self-hosted instances
def create_uncompressed_session_recording_event(
Expand Down

0 comments on commit c69487f

Please sign in to comment.