Skip to content

Commit

Permalink
Update query snapshots
Browse files Browse the repository at this point in the history
  • Loading branch information
github-actions[bot] committed Apr 26, 2024
1 parent e31fa29 commit eba7345
Show file tree
Hide file tree
Showing 4 changed files with 502 additions and 1,024 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
AND latest_1 <= latest_0 + INTERVAL 7 DAY
AND latest_1 <= latest_2
AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1
AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps,
AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps ,
if(isNotNull(latest_1)
AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time,
if(isNotNull(latest_2)
Expand Down Expand Up @@ -86,17 +86,19 @@
steps
HAVING steps = max_steps),
histogram_params AS
(/* Binning ensures that each sample belongs to a bin in results */ /* If bin_count is not a custom number, it's calculated in bin_count_expression */ SELECT ifNull(floor(min(step_1_average_conversion_time_inner)), 0) AS from_seconds,
ifNull(ceil(max(step_1_average_conversion_time_inner)), 1) AS to_seconds,
round(avg(step_1_average_conversion_time_inner), 2) AS average_conversion_time,
count() AS sample_count,
least(60, greatest(1, ceil(cbrt(ifNull(sample_count, 0))))) AS bin_count,
ceil((to_seconds - from_seconds) / bin_count) AS bin_width_seconds_raw, /* Use 60 seconds as fallback bin width in case of only one sample */ if(bin_width_seconds_raw > 0, bin_width_seconds_raw, 60) AS bin_width_seconds
FROM step_runs -- We only need to check step to_step here, because it depends on all the other ones being NOT NULL too
(SELECT ifNull(floor(min(step_1_average_conversion_time_inner)), 0) AS from_seconds,
ifNull(ceil(max(step_1_average_conversion_time_inner)), 1) AS to_seconds,
round(avg(step_1_average_conversion_time_inner), 2) AS average_conversion_time,
count() AS sample_count,
least(60, greatest(1, ceil(cbrt(ifNull(sample_count, 0))))) AS bin_count,
ceil((to_seconds - from_seconds) / bin_count) AS bin_width_seconds_raw,
if(bin_width_seconds_raw > 0, bin_width_seconds_raw, 60) AS bin_width_seconds
FROM step_runs
WHERE step_1_average_conversion_time_inner IS NOT NULL ),

WHERE step_1_average_conversion_time_inner IS NOT NULL ), /* Below CTEs make histogram_params columns available to the query below as straightforward identifiers */
(SELECT bin_width_seconds
FROM histogram_params) AS bin_width_seconds, /* bin_count is only made available as an identifier if it had to be calculated */
FROM histogram_params) AS bin_width_seconds,

(SELECT bin_count
FROM histogram_params) AS bin_count,

Expand All @@ -112,12 +114,12 @@
person_count,
histogram_average_conversion_time AS average_conversion_time
FROM
(/* Calculating bins from step runs */ SELECT histogram_from_seconds + floor((step_1_average_conversion_time_inner - histogram_from_seconds) / bin_width_seconds) * bin_width_seconds AS bin_from_seconds,
count() AS person_count
(SELECT histogram_from_seconds + floor((step_1_average_conversion_time_inner - histogram_from_seconds) / bin_width_seconds) * bin_width_seconds AS bin_from_seconds,
count() AS person_count
FROM step_runs
GROUP BY bin_from_seconds) results
RIGHT OUTER JOIN
(/* Making sure bin_count bins are returned */ /* Those not present in the results query due to lack of data simply get person_count 0 */ SELECT histogram_from_seconds + number * bin_width_seconds AS bin_from_seconds
(SELECT histogram_from_seconds + number * bin_width_seconds AS bin_from_seconds
FROM system.numbers
LIMIT ifNull(bin_count, 0) + 1) fill USING (bin_from_seconds)
ORDER BY bin_from_seconds SETTINGS max_ast_elements=1000000,
Expand Down Expand Up @@ -446,17 +448,19 @@
steps
HAVING steps = max_steps),
histogram_params AS
(/* Binning ensures that each sample belongs to a bin in results */ /* If bin_count is not a custom number, it's calculated in bin_count_expression */ SELECT ifNull(floor(min(step_1_average_conversion_time_inner)), 0) AS from_seconds,
ifNull(ceil(max(step_1_average_conversion_time_inner)), 1) AS to_seconds,
round(avg(step_1_average_conversion_time_inner), 2) AS average_conversion_time,
count() AS sample_count,
least(60, greatest(1, ceil(cbrt(ifNull(sample_count, 0))))) AS bin_count,
ceil((to_seconds - from_seconds) / bin_count) AS bin_width_seconds_raw, /* Use 60 seconds as fallback bin width in case of only one sample */ if(bin_width_seconds_raw > 0, bin_width_seconds_raw, 60) AS bin_width_seconds
FROM step_runs -- We only need to check step to_step here, because it depends on all the other ones being NOT NULL too
(SELECT ifNull(floor(min(step_1_average_conversion_time_inner)), 0) AS from_seconds,
ifNull(ceil(max(step_1_average_conversion_time_inner)), 1) AS to_seconds,
round(avg(step_1_average_conversion_time_inner), 2) AS average_conversion_time,
count() AS sample_count,
least(60, greatest(1, ceil(cbrt(ifNull(sample_count, 0))))) AS bin_count,
ceil((to_seconds - from_seconds) / bin_count) AS bin_width_seconds_raw,
if(bin_width_seconds_raw > 0, bin_width_seconds_raw, 60) AS bin_width_seconds
FROM step_runs
WHERE step_1_average_conversion_time_inner IS NOT NULL ),

WHERE step_1_average_conversion_time_inner IS NOT NULL ), /* Below CTEs make histogram_params columns available to the query below as straightforward identifiers */
(SELECT bin_width_seconds
FROM histogram_params) AS bin_width_seconds, /* bin_count is only made available as an identifier if it had to be calculated */
FROM histogram_params) AS bin_width_seconds,

(SELECT bin_count
FROM histogram_params) AS bin_count,

Expand All @@ -472,12 +476,12 @@
person_count,
histogram_average_conversion_time AS average_conversion_time
FROM
(/* Calculating bins from step runs */ SELECT histogram_from_seconds + floor((step_1_average_conversion_time_inner - histogram_from_seconds) / bin_width_seconds) * bin_width_seconds AS bin_from_seconds,
count() AS person_count
(SELECT histogram_from_seconds + floor((step_1_average_conversion_time_inner - histogram_from_seconds) / bin_width_seconds) * bin_width_seconds AS bin_from_seconds,
count() AS person_count
FROM step_runs
GROUP BY bin_from_seconds) results
RIGHT OUTER JOIN
(/* Making sure bin_count bins are returned */ /* Those not present in the results query due to lack of data simply get person_count 0 */ SELECT histogram_from_seconds + number * bin_width_seconds AS bin_from_seconds
(SELECT histogram_from_seconds + number * bin_width_seconds AS bin_from_seconds
FROM system.numbers
LIMIT ifNull(bin_count, 0) + 1) fill USING (bin_from_seconds)
ORDER BY bin_from_seconds SETTINGS max_ast_elements=1000000,
Expand All @@ -502,7 +506,7 @@
FROM
(SELECT *,
arraySort([latest_0,latest_1,latest_2]) as event_times,
arraySum([if(latest_0 < latest_1 AND latest_1 <= latest_0 + INTERVAL 7 DAY, 1, 0),if(latest_0 < latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 1, 0), 1]) AS steps,
arraySum([if(latest_0 < latest_1 AND latest_1 <= latest_0 + INTERVAL 7 DAY, 1, 0),if(latest_0 < latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 1, 0), 1]) AS steps ,
arraySort([latest_0,latest_1,latest_2]) as conversion_times,
if(isNotNull(conversion_times[2])
AND conversion_times[2] <= conversion_times[1] + INTERVAL 7 DAY, dateDiff('second', conversion_times[1], conversion_times[2]), NULL) step_1_conversion_time,
Expand Down Expand Up @@ -552,7 +556,7 @@
WHERE step_0 = 1
UNION ALL SELECT *,
arraySort([latest_0,latest_1,latest_2]) as event_times,
arraySum([if(latest_0 < latest_1 AND latest_1 <= latest_0 + INTERVAL 7 DAY, 1, 0),if(latest_0 < latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 1, 0), 1]) AS steps,
arraySum([if(latest_0 < latest_1 AND latest_1 <= latest_0 + INTERVAL 7 DAY, 1, 0),if(latest_0 < latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 1, 0), 1]) AS steps ,
arraySort([latest_0,latest_1,latest_2]) as conversion_times,
if(isNotNull(conversion_times[2])
AND conversion_times[2] <= conversion_times[1] + INTERVAL 7 DAY, dateDiff('second', conversion_times[1], conversion_times[2]), NULL) step_1_conversion_time,
Expand Down Expand Up @@ -602,7 +606,7 @@
WHERE step_0 = 1
UNION ALL SELECT *,
arraySort([latest_0,latest_1,latest_2]) as event_times,
arraySum([if(latest_0 < latest_1 AND latest_1 <= latest_0 + INTERVAL 7 DAY, 1, 0),if(latest_0 < latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 1, 0), 1]) AS steps,
arraySum([if(latest_0 < latest_1 AND latest_1 <= latest_0 + INTERVAL 7 DAY, 1, 0),if(latest_0 < latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 1, 0), 1]) AS steps ,
arraySort([latest_0,latest_1,latest_2]) as conversion_times,
if(isNotNull(conversion_times[2])
AND conversion_times[2] <= conversion_times[1] + INTERVAL 7 DAY, dateDiff('second', conversion_times[1], conversion_times[2]), NULL) step_1_conversion_time,
Expand Down Expand Up @@ -654,17 +658,19 @@
steps
HAVING steps = max_steps),
histogram_params AS
(/* Binning ensures that each sample belongs to a bin in results */ /* If bin_count is not a custom number, it's calculated in bin_count_expression */ SELECT ifNull(floor(min(step_1_average_conversion_time_inner)), 0) AS from_seconds,
ifNull(ceil(max(step_1_average_conversion_time_inner)), 1) AS to_seconds,
round(avg(step_1_average_conversion_time_inner), 2) AS average_conversion_time,
count() AS sample_count,
least(60, greatest(1, ceil(cbrt(ifNull(sample_count, 0))))) AS bin_count,
ceil((to_seconds - from_seconds) / bin_count) AS bin_width_seconds_raw, /* Use 60 seconds as fallback bin width in case of only one sample */ if(bin_width_seconds_raw > 0, bin_width_seconds_raw, 60) AS bin_width_seconds
FROM step_runs -- We only need to check step to_step here, because it depends on all the other ones being NOT NULL too
(SELECT ifNull(floor(min(step_1_average_conversion_time_inner)), 0) AS from_seconds,
ifNull(ceil(max(step_1_average_conversion_time_inner)), 1) AS to_seconds,
round(avg(step_1_average_conversion_time_inner), 2) AS average_conversion_time,
count() AS sample_count,
least(60, greatest(1, ceil(cbrt(ifNull(sample_count, 0))))) AS bin_count,
ceil((to_seconds - from_seconds) / bin_count) AS bin_width_seconds_raw,
if(bin_width_seconds_raw > 0, bin_width_seconds_raw, 60) AS bin_width_seconds
FROM step_runs
WHERE step_1_average_conversion_time_inner IS NOT NULL ),

WHERE step_1_average_conversion_time_inner IS NOT NULL ), /* Below CTEs make histogram_params columns available to the query below as straightforward identifiers */
(SELECT bin_width_seconds
FROM histogram_params) AS bin_width_seconds, /* bin_count is only made available as an identifier if it had to be calculated */
FROM histogram_params) AS bin_width_seconds,

(SELECT bin_count
FROM histogram_params) AS bin_count,

Expand All @@ -680,12 +686,12 @@
person_count,
histogram_average_conversion_time AS average_conversion_time
FROM
(/* Calculating bins from step runs */ SELECT histogram_from_seconds + floor((step_1_average_conversion_time_inner - histogram_from_seconds) / bin_width_seconds) * bin_width_seconds AS bin_from_seconds,
count() AS person_count
(SELECT histogram_from_seconds + floor((step_1_average_conversion_time_inner - histogram_from_seconds) / bin_width_seconds) * bin_width_seconds AS bin_from_seconds,
count() AS person_count
FROM step_runs
GROUP BY bin_from_seconds) results
RIGHT OUTER JOIN
(/* Making sure bin_count bins are returned */ /* Those not present in the results query due to lack of data simply get person_count 0 */ SELECT histogram_from_seconds + number * bin_width_seconds AS bin_from_seconds
(SELECT histogram_from_seconds + number * bin_width_seconds AS bin_from_seconds
FROM system.numbers
LIMIT ifNull(bin_count, 0) + 1) fill USING (bin_from_seconds)
ORDER BY bin_from_seconds SETTINGS max_ast_elements=1000000,
Expand Down
27 changes: 9 additions & 18 deletions posthog/queries/test/__snapshots__/test_lifecycle.ambr
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
# serializer version: 1
# name: TestLifecycle.test_sampling
'''
WITH 'day' AS selected_period, -- enumerate all requested periods, so we can zero fill as needed.
-- NOTE: we use dateSub interval rather than seconds, which means we can handle,
-- for instance, month intervals which do not have a fixed number of seconds.
periods AS
WITH 'day' AS selected_period,
periods AS
(SELECT dateSub(day, number, dateTrunc(selected_period, toDateTime('2020-01-19 23:59:59', 'UTC'))) AS start_of_period
FROM numbers(dateDiff('day', dateTrunc('day', toDateTime('2020-01-12 00:00:00', 'UTC')), dateTrunc('day', toDateTime('2020-01-19 23:59:59', 'UTC') + INTERVAL 1 day))))
SELECT groupArray(start_of_period) AS date,
Expand All @@ -18,8 +16,7 @@
(SELECT periods.start_of_period as start_of_period,
toUInt16(0) AS counts,
status
FROM periods -- Zero fill for each status

FROM periods
CROSS JOIN
(SELECT status
FROM
Expand Down Expand Up @@ -77,10 +74,8 @@
# ---
# name: TestLifecycle.test_timezones
'''
WITH 'day' AS selected_period, -- enumerate all requested periods, so we can zero fill as needed.
-- NOTE: we use dateSub interval rather than seconds, which means we can handle,
-- for instance, month intervals which do not have a fixed number of seconds.
periods AS
WITH 'day' AS selected_period,
periods AS
(SELECT dateSub(day, number, dateTrunc(selected_period, toDateTime('2020-01-19 23:59:59', 'UTC'))) AS start_of_period
FROM numbers(dateDiff('day', dateTrunc('day', toDateTime('2020-01-12 00:00:00', 'UTC')), dateTrunc('day', toDateTime('2020-01-19 23:59:59', 'UTC') + INTERVAL 1 day))))
SELECT groupArray(start_of_period) AS date,
Expand All @@ -94,8 +89,7 @@
(SELECT periods.start_of_period as start_of_period,
toUInt16(0) AS counts,
status
FROM periods -- Zero fill for each status

FROM periods
CROSS JOIN
(SELECT status
FROM
Expand Down Expand Up @@ -153,10 +147,8 @@
# ---
# name: TestLifecycle.test_timezones.1
'''
WITH 'day' AS selected_period, -- enumerate all requested periods, so we can zero fill as needed.
-- NOTE: we use dateSub interval rather than seconds, which means we can handle,
-- for instance, month intervals which do not have a fixed number of seconds.
periods AS
WITH 'day' AS selected_period,
periods AS
(SELECT dateSub(day, number, dateTrunc(selected_period, toDateTime('2020-01-19 23:59:59', 'US/Pacific'))) AS start_of_period
FROM numbers(dateDiff('day', dateTrunc('day', toDateTime('2020-01-12 00:00:00', 'US/Pacific')), dateTrunc('day', toDateTime('2020-01-19 23:59:59', 'US/Pacific') + INTERVAL 1 day))))
SELECT groupArray(start_of_period) AS date,
Expand All @@ -170,8 +162,7 @@
(SELECT periods.start_of_period as start_of_period,
toUInt16(0) AS counts,
status
FROM periods -- Zero fill for each status

FROM periods
CROSS JOIN
(SELECT status
FROM
Expand Down
Loading

0 comments on commit eba7345

Please sign in to comment.