Skip to content

Commit

Permalink
Fix column order difference between creation and migration
Browse files Browse the repository at this point in the history
  • Loading branch information
robbie-c committed Aug 21, 2024
1 parent 112aa68 commit 21b7ef4
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 9 deletions.
27 changes: 27 additions & 0 deletions posthog/clickhouse/test/test_raw_sessions_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from posthog.clickhouse.client import sync_execute
from posthog.models.raw_sessions.sql import RAW_SESSION_TABLE_BACKFILL_SELECT_SQL
from posthog.models.utils import uuid7
from posthog.test.base import (
_create_event,
ClickhouseTestMixin,
BaseTest,
)


class TestRawSessionsModel(ClickhouseTestMixin, BaseTest):
def test_backfill_sql(self):
distinct_id = str(uuid7())
session_id = str(uuid7())
_create_event(
team=self.team,
event="$pageview",
distinct_id=distinct_id,
properties={"$current_url": "/", "$session_id": session_id},
timestamp="2024-03-08",
)

# just test that the backfill SQL can be run without error
sync_execute(
"INSERT INTO raw_sessions" + RAW_SESSION_TABLE_BACKFILL_SELECT_SQL() + "AND team_id = %(team_id)s",
{"team_id": self.team.id},
)
1 change: 1 addition & 0 deletions posthog/models/raw_sessions/migrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
ADD COLUMN IF NOT EXISTS
page_screen_autocapture_uniq_up_to
AggregateFunction(uniqUpTo(1), Nullable(UUID))
AFTER maybe_has_session_replay
"""

BASE_RAW_SESSIONS_ADD_PAGEVIEW_AUTOCAPTURE_SCREEN_UP_TO_2_COLUMN_SQL = (
Expand Down
25 changes: 16 additions & 9 deletions posthog/models/raw_sessions/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,11 +94,12 @@
autocapture_uniq AggregateFunction(uniq, Nullable(UUID)),
screen_count SimpleAggregateFunction(sum, Int64),
screen_uniq AggregateFunction(uniq, Nullable(UUID)),
-- as a performance optimisation, also keep track of the uniq events for all of these combined, a bounce is a session with <2 of these
page_screen_autocapture_uniq_up_to AggregateFunction(uniqUpTo(1), Nullable(UUID)),
-- replay
maybe_has_session_replay SimpleAggregateFunction(max, Bool) -- will be written False to by the events table mv and True to by the replay table mv
maybe_has_session_replay SimpleAggregateFunction(max, Bool), -- will be written False to by the events table mv and True to by the replay table mv
-- as a performance optimisation, also keep track of the uniq events for all of these combined, a bounce is a session with <2 of these
page_screen_autocapture_uniq_up_to AggregateFunction(uniqUpTo(1), Nullable(UUID))
) ENGINE = {engine}
"""

Expand Down Expand Up @@ -214,10 +215,12 @@ def source_int_column(column_name: str) -> str:
initializeAggregation('uniqState', if(event='autocapture', uuid, NULL)) as autocapture_uniq,
if(event='$screen', 1, 0) as screen_count,
initializeAggregation('uniqState', if(event='screen', uuid, NULL)) as screen_uniq,
initializeAggregation('uniqUpToState(1)', if(event='$pageview' OR event='$screen' OR event='$autocapture', uuid, NULL)) as page_screen_autocapture_uniq_up_to,
-- replay
false as maybe_has_session_replay
false as maybe_has_session_replay,
-- perf
initializeAggregation('uniqUpToState(1)', if(event='$pageview' OR event='$screen' OR event='$autocapture', uuid, NULL)) as page_screen_autocapture_uniq_up_to
FROM {database}.events
WHERE bitAnd(bitShiftRight(toUInt128(accurateCastOrNull(`$session_id`, 'UUID')), 76), 0xF) == 7 -- has a session id and is valid uuidv7
""".format(
Expand Down Expand Up @@ -321,10 +324,12 @@ def source_int_column(column_name: str) -> str:
uniqState(if(event='$autocapture', uuid, NULL)) as autocapture_uniq,
sumIf(1, event='$screen') as screen_count,
uniqState(if(event='$screen', uuid, NULL)) as screen_uniq,
uniqUpToState(1)(if(event='$pageview' OR event='$screen' OR event='$autocapture', uuid, NULL)) as page_screen_autocapture_uniq_up_to,
-- replay
false as maybe_has_session_replay
false as maybe_has_session_replay,
-- perf
uniqUpToState(1)(if(event='$pageview' OR event='$screen' OR event='$autocapture', uuid, NULL)) as page_screen_autocapture_uniq_up_to
FROM {database}.sharded_events
WHERE bitAnd(bitShiftRight(toUInt128(accurateCastOrNull(`$session_id`, 'UUID')), 76), 0xF) == 7 -- has a session id and is valid uuidv7)
GROUP BY
Expand Down Expand Up @@ -429,7 +434,7 @@ def source_int_column(column_name: str) -> str:
session_id_v7,
fromUnixTimestamp(intDiv(toUInt64(bitShiftRight(session_id_v7, 80)), 1000)) as session_timestamp,
team_id,
any(distinct_id) as distinct_id,
argMaxMerge(distinct_id) as distinct_id,
min(min_timestamp) as min_timestamp,
max(max_timestamp) as max_timestamp,
Expand Down Expand Up @@ -483,7 +488,9 @@ def source_int_column(column_name: str) -> str:
sum(screen_count) as screen_count,
uniqMerge(screen_uniq) as screen_uniq,
max(maybe_has_session_replay) as maybe_has_session_replay
max(maybe_has_session_replay) as maybe_has_session_replay,
uniqUpToMerge(1)(page_screen_autocapture_uniq_up_to) as page_screen_autocapture_uniq_up_to
FROM {TABLE_BASE_NAME}
GROUP BY session_id_v7, team_id
"""
Expand Down

0 comments on commit 21b7ef4

Please sign in to comment.