Skip to content

Commit

Permalink
Update query snapshots
Browse files Browse the repository at this point in the history
  • Loading branch information
github-actions[bot] authored and robbie-c committed Jun 19, 2024
1 parent c4cf3c6 commit 4cd1338
Showing 1 changed file with 301 additions and 0 deletions.
301 changes: 301 additions & 0 deletions posthog/clickhouse/test/__snapshots__/test_schema.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -1595,6 +1595,118 @@
_offset
FROM posthog_test.kafka_plugin_log_entries

'''
# ---
# name: test_create_table_query[raw_sessions]
'''

CREATE TABLE IF NOT EXISTS raw_sessions ON CLUSTER 'posthog'
(
team_id Int64,
session_id_v7 UInt128, -- integer representation of a uuidv7

-- ClickHouse will pick the latest value of distinct_id for the session
-- this is fine since even if the distinct_id changes during a session
distinct_id AggregateFunction(argMax, String, DateTime64(6, 'UTC')),

min_timestamp SimpleAggregateFunction(min, DateTime64(6, 'UTC')),
max_timestamp SimpleAggregateFunction(max, DateTime64(6, 'UTC')),

urls SimpleAggregateFunction(groupUniqArrayArray, Array(String)),
entry_url AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
end_url AggregateFunction(argMax, String, DateTime64(6, 'UTC')),

initial_referring_domain AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_utm_source AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_utm_campaign AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_utm_medium AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_utm_term AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_utm_content AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_gclid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_gad_source AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_gclsrc AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_dclid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_gbraid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_wbraid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_fbclid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_msclkid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_twclid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_li_fat_id AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_mc_cid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_igshid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_ttclid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),

-- Count pageview, autocapture, and screen events for providing totals.
-- It's unclear if we can use the counts as they are not idempotent, and we had a bug on EU where events were
-- double-counted, so the counts were wrong. To get around this, also keep track of the unique uuids. This will be
-- slower and more expensive to store, but will be correct even if events are double-counted, so can be used to
-- verify correctness and as a backup. Ideally we will be able to delete the uniq columns in the future when we're
-- satisfied that counts are accurate.
pageview_count SimpleAggregateFunction(sum, Int64),
pageview_uniq AggregateFunction(uniq, Nullable(UUID)),
autocapture_count SimpleAggregateFunction(sum, Int64),
autocapture_uniq AggregateFunction(uniq, Nullable(UUID)),
screen_count SimpleAggregateFunction(sum, Int64),
screen_uniq AggregateFunction(uniq, Nullable(UUID)),

has_session_replay SimpleAggregateFunction(max, Bool) -- we're not writing True values to this column anywhere yet
) ENGINE = Distributed('posthog', 'posthog_test', 'sharded_raw_sessions', cityHash64(session_id_v7))

'''
# ---
# name: test_create_table_query[raw_sessions_mv]
'''

CREATE MATERIALIZED VIEW IF NOT EXISTS raw_sessions_mv ON CLUSTER 'posthog'
TO posthog_test.writable_raw_sessions
AS

SELECT
team_id,
toUInt128(toUUID(`$session_id`)) as session_id_v7,

argMaxState(distinct_id, timestamp) as distinct_id,

min(timestamp) AS min_timestamp,
max(timestamp) AS max_timestamp,

groupUniqArray(nullIf(JSONExtractString(properties, '$current_url'), '')) AS urls,
argMinState(JSONExtractString(properties, '$current_url'), timestamp) as entry_url,
argMaxState(JSONExtractString(properties, '$current_url'), timestamp) as end_url,

argMinState(JSONExtractString(properties, '$referring_domain'), timestamp) as initial_referring_domain,
argMinState(JSONExtractString(properties, 'utm_source'), timestamp) as initial_utm_source,
argMinState(JSONExtractString(properties, 'utm_campaign'), timestamp) as initial_utm_campaign,
argMinState(JSONExtractString(properties, 'utm_medium'), timestamp) as initial_utm_medium,
argMinState(JSONExtractString(properties, 'utm_term'), timestamp) as initial_utm_term,
argMinState(JSONExtractString(properties, 'utm_content'), timestamp) as initial_utm_content,
argMinState(JSONExtractString(properties, 'gclid'), timestamp) as initial_gclid,
argMinState(JSONExtractString(properties, 'gad_source'), timestamp) as initial_gad_source,
argMinState(JSONExtractString(properties, 'gclsrc'), timestamp) as initial_gclsrc,
argMinState(JSONExtractString(properties, 'dclid'), timestamp) as initial_dclid,
argMinState(JSONExtractString(properties, 'gbraid'), timestamp) as initial_gbraid,
argMinState(JSONExtractString(properties, 'wbraid'), timestamp) as initial_wbraid,
argMinState(JSONExtractString(properties, 'fbclid'), timestamp) as initial_fbclid,
argMinState(JSONExtractString(properties, 'msclkid'), timestamp) as initial_msclkid,
argMinState(JSONExtractString(properties, 'twclid'), timestamp) as initial_twclid,
argMinState(JSONExtractString(properties, 'la_fat_id'), timestamp) as initial_li_fat_id,
argMinState(JSONExtractString(properties, 'mc_cid'), timestamp) as initial_mc_cid,
argMinState(JSONExtractString(properties, 'igshid'), timestamp) as initial_igshid,
argMinState(JSONExtractString(properties, 'ttclid'), timestamp) as initial_ttclid,

sumIf(1, event='$pageview') as pageview_count,
uniqState(if(event='$pageview', uuid, NULL)) as pageview_uniq,
sumIf(1, event='$autocapture') as autocapture_count,
uniqState(if(event='$autocapture', uuid, NULL)) as autocapture_uniq,
sumIf(1, event='$screen') as screen_count,
uniqState(if(event='$screen', uuid, NULL)) as screen_uniq,

false as has_session_replay
FROM posthog_test.sharded_events
WHERE bitAnd(bitShiftRight(toUInt128(accurateCastOrNull(`$session_id`, 'UUID')), 76), 0xF) == 7 -- has a session id and is valid uuidv7
GROUP BY session_id_v7, team_id


'''
# ---
# name: test_create_table_query[session_recording_events]
Expand Down Expand Up @@ -2048,6 +2160,72 @@



'''
# ---
# name: test_create_table_query[sharded_raw_sessions]
'''

CREATE TABLE IF NOT EXISTS sharded_raw_sessions ON CLUSTER 'posthog'
(
team_id Int64,
session_id_v7 UInt128, -- integer representation of a uuidv7

-- ClickHouse will pick the latest value of distinct_id for the session
-- this is fine since even if the distinct_id changes during a session
distinct_id AggregateFunction(argMax, String, DateTime64(6, 'UTC')),

min_timestamp SimpleAggregateFunction(min, DateTime64(6, 'UTC')),
max_timestamp SimpleAggregateFunction(max, DateTime64(6, 'UTC')),

urls SimpleAggregateFunction(groupUniqArrayArray, Array(String)),
entry_url AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
end_url AggregateFunction(argMax, String, DateTime64(6, 'UTC')),

initial_referring_domain AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_utm_source AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_utm_campaign AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_utm_medium AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_utm_term AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_utm_content AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_gclid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_gad_source AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_gclsrc AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_dclid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_gbraid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_wbraid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_fbclid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_msclkid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_twclid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_li_fat_id AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_mc_cid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_igshid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_ttclid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),

-- Count pageview, autocapture, and screen events for providing totals.
-- It's unclear if we can use the counts as they are not idempotent, and we had a bug on EU where events were
-- double-counted, so the counts were wrong. To get around this, also keep track of the unique uuids. This will be
-- slower and more expensive to store, but will be correct even if events are double-counted, so can be used to
-- verify correctness and as a backup. Ideally we will be able to delete the uniq columns in the future when we're
-- satisfied that counts are accurate.
pageview_count SimpleAggregateFunction(sum, Int64),
pageview_uniq AggregateFunction(uniq, Nullable(UUID)),
autocapture_count SimpleAggregateFunction(sum, Int64),
autocapture_uniq AggregateFunction(uniq, Nullable(UUID)),
screen_count SimpleAggregateFunction(sum, Int64),
screen_uniq AggregateFunction(uniq, Nullable(UUID)),

has_session_replay SimpleAggregateFunction(max, Bool) -- we're not writing True values to this column anywhere yet
) ENGINE = ReplicatedAggregatingMergeTree('/clickhouse/tables/77f1df52-4b43-11e9-910f-b8ca3a9b9f3e_{shard}/posthog.raw_sessions', '{replica}')

PARTITION BY toYYYYMM(fromUnixTimestamp(intDiv(toUInt64(bitShiftRight(session_id_v7, 80)), 1000)))
ORDER BY (
team_id,
toStartOfHour(fromUnixTimestamp(intDiv(toUInt64(bitShiftRight(session_id_v7, 80)), 1000))),
cityHash64(session_id_v7),
session_id_v7
)
SAMPLE BY cityHash64(session_id_v7)

'''
# ---
# name: test_create_table_query[sharded_session_recording_events]
Expand Down Expand Up @@ -2262,6 +2440,63 @@

'''
# ---
# name: test_create_table_query[writable_raw_sessions]
'''

CREATE TABLE IF NOT EXISTS writable_raw_sessions ON CLUSTER 'posthog'
(
team_id Int64,
session_id_v7 UInt128, -- integer representation of a uuidv7

-- ClickHouse will pick the latest value of distinct_id for the session
-- this is fine since even if the distinct_id changes during a session
distinct_id AggregateFunction(argMax, String, DateTime64(6, 'UTC')),

min_timestamp SimpleAggregateFunction(min, DateTime64(6, 'UTC')),
max_timestamp SimpleAggregateFunction(max, DateTime64(6, 'UTC')),

urls SimpleAggregateFunction(groupUniqArrayArray, Array(String)),
entry_url AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
end_url AggregateFunction(argMax, String, DateTime64(6, 'UTC')),

initial_referring_domain AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_utm_source AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_utm_campaign AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_utm_medium AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_utm_term AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_utm_content AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_gclid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_gad_source AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_gclsrc AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_dclid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_gbraid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_wbraid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_fbclid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_msclkid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_twclid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_li_fat_id AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_mc_cid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_igshid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_ttclid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),

-- Count pageview, autocapture, and screen events for providing totals.
-- It's unclear if we can use the counts as they are not idempotent, and we had a bug on EU where events were
-- double-counted, so the counts were wrong. To get around this, also keep track of the unique uuids. This will be
-- slower and more expensive to store, but will be correct even if events are double-counted, so can be used to
-- verify correctness and as a backup. Ideally we will be able to delete the uniq columns in the future when we're
-- satisfied that counts are accurate.
pageview_count SimpleAggregateFunction(sum, Int64),
pageview_uniq AggregateFunction(uniq, Nullable(UUID)),
autocapture_count SimpleAggregateFunction(sum, Int64),
autocapture_uniq AggregateFunction(uniq, Nullable(UUID)),
screen_count SimpleAggregateFunction(sum, Int64),
screen_uniq AggregateFunction(uniq, Nullable(UUID)),

has_session_replay SimpleAggregateFunction(max, Bool) -- we're not writing True values to this column anywhere yet
) ENGINE = Distributed('posthog', 'posthog_test', 'sharded_raw_sessions', cityHash64(session_id_v7))

'''
# ---
# name: test_create_table_query[writable_session_recording_events]
'''

Expand Down Expand Up @@ -2942,6 +3177,72 @@

'''
# ---
# name: test_create_table_query_replicated_and_storage[sharded_raw_sessions]
'''

CREATE TABLE IF NOT EXISTS sharded_raw_sessions ON CLUSTER 'posthog'
(
team_id Int64,
session_id_v7 UInt128, -- integer representation of a uuidv7

-- ClickHouse will pick the latest value of distinct_id for the session
-- this is fine since even if the distinct_id changes during a session
distinct_id AggregateFunction(argMax, String, DateTime64(6, 'UTC')),

min_timestamp SimpleAggregateFunction(min, DateTime64(6, 'UTC')),
max_timestamp SimpleAggregateFunction(max, DateTime64(6, 'UTC')),

urls SimpleAggregateFunction(groupUniqArrayArray, Array(String)),
entry_url AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
end_url AggregateFunction(argMax, String, DateTime64(6, 'UTC')),

initial_referring_domain AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_utm_source AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_utm_campaign AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_utm_medium AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_utm_term AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_utm_content AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_gclid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_gad_source AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_gclsrc AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_dclid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_gbraid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_wbraid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_fbclid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_msclkid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_twclid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_li_fat_id AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_mc_cid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_igshid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),
initial_ttclid AggregateFunction(argMin, String, DateTime64(6, 'UTC')),

-- Count pageview, autocapture, and screen events for providing totals.
-- It's unclear if we can use the counts as they are not idempotent, and we had a bug on EU where events were
-- double-counted, so the counts were wrong. To get around this, also keep track of the unique uuids. This will be
-- slower and more expensive to store, but will be correct even if events are double-counted, so can be used to
-- verify correctness and as a backup. Ideally we will be able to delete the uniq columns in the future when we're
-- satisfied that counts are accurate.
pageview_count SimpleAggregateFunction(sum, Int64),
pageview_uniq AggregateFunction(uniq, Nullable(UUID)),
autocapture_count SimpleAggregateFunction(sum, Int64),
autocapture_uniq AggregateFunction(uniq, Nullable(UUID)),
screen_count SimpleAggregateFunction(sum, Int64),
screen_uniq AggregateFunction(uniq, Nullable(UUID)),

has_session_replay SimpleAggregateFunction(max, Bool) -- we're not writing True values to this column anywhere yet
) ENGINE = ReplicatedAggregatingMergeTree('/clickhouse/tables/77f1df52-4b43-11e9-910f-b8ca3a9b9f3e_{shard}/posthog.raw_sessions', '{replica}')

PARTITION BY toYYYYMM(fromUnixTimestamp(intDiv(toUInt64(bitShiftRight(session_id_v7, 80)), 1000)))
ORDER BY (
team_id,
toStartOfHour(fromUnixTimestamp(intDiv(toUInt64(bitShiftRight(session_id_v7, 80)), 1000))),
cityHash64(session_id_v7),
session_id_v7
)
SAMPLE BY cityHash64(session_id_v7)

'''
# ---
# name: test_create_table_query_replicated_and_storage[sharded_session_recording_events]
'''

Expand Down

0 comments on commit 4cd1338

Please sign in to comment.