Skip to content

Commit

Permalink
fix: correct materialized view definition for session replay events s…
Browse files Browse the repository at this point in the history
…napshot source (#19389)

* fix: correct materialized view definition for session replay events snapshot source

* Update query snapshots

---------

Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
pauldambra and github-actions[bot] authored Dec 18, 2023
1 parent 76f57ad commit 962756b
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 6 deletions.
15 changes: 15 additions & 0 deletions posthog/clickhouse/migrations/0051_session_replay_source_mv_fix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from posthog.clickhouse.client.migration_tools import run_sql_with_exceptions
from posthog.session_recordings.sql.session_replay_event_migrations_sql import (
DROP_SESSION_REPLAY_EVENTS_TABLE_MV_SQL,
)
from posthog.session_recordings.sql.session_replay_event_sql import (
SESSION_REPLAY_EVENTS_TABLE_MV_SQL,
)

operations = [
# we have to drop materialized view because 0050 created it incorrectly
run_sql_with_exceptions(DROP_SESSION_REPLAY_EVENTS_TABLE_MV_SQL()),
# now we can recreate it with explicit column definitions
# that correctly identifies snapshot source as LowCardinality(Nullable(String))
run_sql_with_exceptions(SESSION_REPLAY_EVENTS_TABLE_MV_SQL()),
]
14 changes: 13 additions & 1 deletion posthog/clickhouse/test/__snapshots__/test_schema.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -1495,7 +1495,19 @@
'

CREATE MATERIALIZED VIEW IF NOT EXISTS session_replay_events_mv ON CLUSTER 'posthog'
TO posthog_test.writable_session_replay_events
TO posthog_test.writable_session_replay_events (
`session_id` String, `team_id` Int64, `distinct_id` String,
`min_first_timestamp` DateTime64(6, 'UTC'),
`max_last_timestamp` DateTime64(6, 'UTC'),
`first_url` AggregateFunction(argMin, Nullable(String), DateTime64(6, 'UTC')),
`click_count` Int64, `keypress_count` Int64,
`mouse_activity_count` Int64, `active_milliseconds` Int64,
`console_log_count` Int64, `console_warn_count` Int64,
`console_error_count` Int64, `size` Int64, `message_count` Int64,
`event_count` Int64,
`snapshot_source` AggregateFunction(argMin, LowCardinality(Nullable(String)), DateTime64(6, 'UTC')),
`_timestamp` Nullable(DateTime)
)
AS SELECT
session_id,
team_id,
Expand Down
24 changes: 19 additions & 5 deletions posthog/session_recordings/sql/session_replay_event_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
) ENGINE = {engine}
"""

# if updating these column definitions
# you'll need to update the explicit column definitions in the materialized view creation statement below
SESSION_REPLAY_EVENTS_TABLE_BASE_SQL = """
CREATE TABLE IF NOT EXISTS {table_name} ON CLUSTER '{cluster}'
(
Expand Down Expand Up @@ -107,11 +109,10 @@
engine=kafka_engine(topic=KAFKA_CLICKHOUSE_SESSION_REPLAY_EVENTS),
)


SESSION_REPLAY_EVENTS_TABLE_MV_SQL = (
lambda: """
CREATE MATERIALIZED VIEW IF NOT EXISTS session_replay_events_mv ON CLUSTER '{cluster}'
TO {database}.{target_table}
TO {database}.{target_table} {explictly_specify_columns}
AS SELECT
session_id,
team_id,
Expand Down Expand Up @@ -147,10 +148,25 @@
target_table="writable_session_replay_events",
cluster=settings.CLICKHOUSE_CLUSTER,
database=settings.CLICKHOUSE_DATABASE,
# ClickHouse is incorrectly expanding the type of the snapshot source column
# Despite it being a LowCardinality(Nullable(String)) in writable_session_replay_events
# The column expansion picks only Nullable(String) and so we can't select it
explictly_specify_columns="""(
`session_id` String, `team_id` Int64, `distinct_id` String,
`min_first_timestamp` DateTime64(6, 'UTC'),
`max_last_timestamp` DateTime64(6, 'UTC'),
`first_url` AggregateFunction(argMin, Nullable(String), DateTime64(6, 'UTC')),
`click_count` Int64, `keypress_count` Int64,
`mouse_activity_count` Int64, `active_milliseconds` Int64,
`console_log_count` Int64, `console_warn_count` Int64,
`console_error_count` Int64, `size` Int64, `message_count` Int64,
`event_count` Int64,
`snapshot_source` AggregateFunction(argMin, LowCardinality(Nullable(String)), DateTime64(6, 'UTC')),
`_timestamp` Nullable(DateTime)
)""",
)
)


# Distributed engine tables are only created if CLICKHOUSE_REPLICATED

# This table is responsible for writing to sharded_session_replay_events based on a sharding key.
Expand All @@ -163,7 +179,6 @@
),
)


# This table is responsible for reading from session_replay_events on a cluster setting
DISTRIBUTED_SESSION_REPLAY_EVENTS_TABLE_SQL = lambda: SESSION_REPLAY_EVENTS_TABLE_BASE_SQL.format(
table_name="session_replay_events",
Expand All @@ -174,7 +189,6 @@
),
)


DROP_SESSION_REPLAY_EVENTS_TABLE_SQL = lambda: (
f"DROP TABLE IF EXISTS {SESSION_REPLAY_EVENTS_DATA_TABLE()} ON CLUSTER '{settings.CLICKHOUSE_CLUSTER}'"
)
Expand Down

0 comments on commit 962756b

Please sign in to comment.