Skip to content

Commit

Permalink
Fix column order difference between creation and migration
Browse files Browse the repository at this point in the history
  • Loading branch information
robbie-c committed Aug 21, 2024
1 parent 112aa68 commit fd74671
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 9 deletions.
42 changes: 42 additions & 0 deletions posthog/clickhouse/test/test_raw_sessions_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from posthog.clickhouse.client import sync_execute
from posthog.models.raw_sessions.sql import RAW_SESSION_TABLE_BACKFILL_SELECT_SQL
from posthog.models.utils import uuid7
from posthog.test.base import (
_create_event,
ClickhouseTestMixin,
BaseTest,
)

distinct_id_counter = 0
session_id_counter = 0


def create_distinct_id():
global distinct_id_counter
distinct_id_counter += 1
return f"d{distinct_id_counter}"


def create_session_id():
global session_id_counter
session_id_counter += 1
return str(uuid7(random=session_id_counter))


class TestRawSessionsModel(ClickhouseTestMixin, BaseTest):
def test_backfill_sql(self):
distinct_id = create_distinct_id()
session_id = create_session_id()
_create_event(
team=self.team,
event="$pageview",
distinct_id=distinct_id,
properties={"$current_url": "/", "$session_id": session_id},
timestamp="2024-03-08",
)

# just test that the backfill SQL can be run without error
sync_execute(
"INSERT INTO raw_sessions" + RAW_SESSION_TABLE_BACKFILL_SELECT_SQL() + "AND team_id = %(team_id)s",
{"team_id": self.team.id},
)
2 changes: 2 additions & 0 deletions posthog/models/raw_sessions/migrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@

from posthog.models.raw_sessions.sql import RAW_SESSIONS_DATA_TABLE, TABLE_BASE_NAME

# perf
ADD_PAGEVIEW_AUTOCAPTURE_SCREEN_UP_TO_2_COLUMN_SQL = """
ALTER TABLE {table_name} on CLUSTER '{cluster}'
ADD COLUMN IF NOT EXISTS
page_screen_autocapture_uniq_up_to
AggregateFunction(uniqUpTo(1), Nullable(UUID))
AFTER maybe_has_session_replay
"""

BASE_RAW_SESSIONS_ADD_PAGEVIEW_AUTOCAPTURE_SCREEN_UP_TO_2_COLUMN_SQL = (
Expand Down
30 changes: 21 additions & 9 deletions posthog/models/raw_sessions/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,11 +94,12 @@
autocapture_uniq AggregateFunction(uniq, Nullable(UUID)),
screen_count SimpleAggregateFunction(sum, Int64),
screen_uniq AggregateFunction(uniq, Nullable(UUID)),
-- as a performance optimisation, also keep track of the uniq events for all of these combined, a bounce is a session with <2 of these
page_screen_autocapture_uniq_up_to AggregateFunction(uniqUpTo(1), Nullable(UUID)),
-- replay
maybe_has_session_replay SimpleAggregateFunction(max, Bool) -- will be written False to by the events table mv and True to by the replay table mv
maybe_has_session_replay SimpleAggregateFunction(max, Bool), -- will be written False to by the events table mv and True to by the replay table mv
-- as a performance optimisation, also keep track of the uniq events for all of these combined, a bounce is a session with <2 of these
page_screen_autocapture_uniq_up_to AggregateFunction(uniqUpTo(1), Nullable(UUID))
) ENGINE = {engine}
"""

Expand Down Expand Up @@ -153,6 +154,11 @@ def source_int_column(column_name: str) -> str:
return f"JSONExtractInt(properties, '{column_name}')"


def source_nullable_float_column(column_name: str) -> str:
# this is what we do in queries, but it seems pretty awful
return f"""accurateCastOrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '{column_name}'), ''), 'null'), '^"|"$', ''), 'Float64')"""


RAW_SESSION_TABLE_BACKFILL_SELECT_SQL = (
lambda: """
SELECT
Expand Down Expand Up @@ -214,10 +220,12 @@ def source_int_column(column_name: str) -> str:
initializeAggregation('uniqState', if(event='autocapture', uuid, NULL)) as autocapture_uniq,
if(event='$screen', 1, 0) as screen_count,
initializeAggregation('uniqState', if(event='screen', uuid, NULL)) as screen_uniq,
initializeAggregation('uniqUpToState(1)', if(event='$pageview' OR event='$screen' OR event='$autocapture', uuid, NULL)) as page_screen_autocapture_uniq_up_to,
-- replay
false as maybe_has_session_replay
false as maybe_has_session_replay,
-- perf
initializeAggregation('uniqUpToState(1)', if(event='$pageview' OR event='$screen' OR event='$autocapture', uuid, NULL)) as page_screen_autocapture_uniq_up_to
FROM {database}.events
WHERE bitAnd(bitShiftRight(toUInt128(accurateCastOrNull(`$session_id`, 'UUID')), 76), 0xF) == 7 -- has a session id and is valid uuidv7
""".format(
Expand Down Expand Up @@ -321,10 +329,12 @@ def source_int_column(column_name: str) -> str:
uniqState(if(event='$autocapture', uuid, NULL)) as autocapture_uniq,
sumIf(1, event='$screen') as screen_count,
uniqState(if(event='$screen', uuid, NULL)) as screen_uniq,
uniqUpToState(1)(if(event='$pageview' OR event='$screen' OR event='$autocapture', uuid, NULL)) as page_screen_autocapture_uniq_up_to,
-- replay
false as maybe_has_session_replay
false as maybe_has_session_replay,
-- perf
uniqUpToState(1)(if(event='$pageview' OR event='$screen' OR event='$autocapture', uuid, NULL)) as page_screen_autocapture_uniq_up_to
FROM {database}.sharded_events
WHERE bitAnd(bitShiftRight(toUInt128(accurateCastOrNull(`$session_id`, 'UUID')), 76), 0xF) == 7 -- has a session id and is valid uuidv7)
GROUP BY
Expand Down Expand Up @@ -429,7 +439,7 @@ def source_int_column(column_name: str) -> str:
session_id_v7,
fromUnixTimestamp(intDiv(toUInt64(bitShiftRight(session_id_v7, 80)), 1000)) as session_timestamp,
team_id,
any(distinct_id) as distinct_id,
argMaxMerge(distinct_id) as distinct_id,
min(min_timestamp) as min_timestamp,
max(max_timestamp) as max_timestamp,
Expand Down Expand Up @@ -483,7 +493,9 @@ def source_int_column(column_name: str) -> str:
sum(screen_count) as screen_count,
uniqMerge(screen_uniq) as screen_uniq,
max(maybe_has_session_replay) as maybe_has_session_replay
max(maybe_has_session_replay) as maybe_has_session_replay,
uniqUpToMerge(1)(page_screen_autocapture_uniq_up_to) as page_screen_autocapture_uniq_up_to
FROM {TABLE_BASE_NAME}
GROUP BY session_id_v7, team_id
"""
Expand Down

0 comments on commit fd74671

Please sign in to comment.