Skip to content

Commit

Permalink
Merge branch 'aspicer/udf' of github.com:PostHog/posthog into aspicer…
Browse files Browse the repository at this point in the history
…/udf
  • Loading branch information
aspicer committed Aug 21, 2024
2 parents 65f5944 + 35f2ad0 commit 519ec76
Showing 1 changed file with 202 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
# serializer version: 1
# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen
'''
SELECT sum(step_1) AS step_1,
sum(step_2) AS step_2,
if(isNaN(avgArrayOrNull(step_1_conversion_time_array) AS inter_1_conversion), NULL, inter_1_conversion) AS step_1_average_conversion_time,
if(isNaN(medianArrayOrNull(step_1_conversion_time_array) AS inter_1_median), NULL, inter_1_median) AS step_1_median_conversion_time,
if(ifNull(less(row_number, 26), 0), prop, ['Other']) AS final_prop
FROM
(SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1,
countIf(ifNull(equals(steps, 2), 0)) AS step_2,
groupArray(step_1_conversion_time) AS step_1_conversion_time_array,
prop AS prop,
row_number() OVER (
ORDER BY step_2 DESC) AS row_number
FROM
(SELECT aggregation_target AS aggregation_target,
steps AS steps,
prop AS prop,
prop AS prop,
min(step_1_conversion_time) AS step_1_conversion_time
FROM
(SELECT aggregation_target AS aggregation_target,
steps AS steps,
prop AS prop,
max(steps) OVER (PARTITION BY aggregation_target,
prop) AS max_steps,
step_1_conversion_time AS step_1_conversion_time,
prop AS prop
FROM
(SELECT aggregation_target AS aggregation_target,
timestamp AS timestamp,
step_0 AS step_0,
latest_0 AS latest_0,
step_1 AS step_1,
latest_1 AS latest_1,
prop AS prop,
if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1) AS steps,
if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
prop AS prop
FROM
(SELECT aggregation_target AS aggregation_target,
timestamp AS timestamp,
step_0 AS step_0,
latest_0 AS latest_0,
step_1 AS step_1,
min(latest_1) OVER (PARTITION BY aggregation_target,
prop
ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
prop AS prop
FROM
(SELECT timestamp AS timestamp,
aggregation_target AS aggregation_target,
step_0 AS step_0,
latest_0 AS latest_0,
step_1 AS step_1,
latest_1 AS latest_1,
prop_basic AS prop_basic,
prop,
prop_vals AS prop_vals,
if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['']) AS prop
FROM
(SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
e__pdi.person_id AS aggregation_target,
if(equals(e.event, 'sign up'), 1, 0) AS step_0,
if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
if(and(equals(e.event, 'buy'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), 'xyz'), 0)), 1, 0) AS step_1,
if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
[ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', '')), '')] AS prop_basic,
prop_basic AS prop,
argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) OVER (PARTITION BY aggregation_target) AS prop_vals
FROM events AS e
INNER JOIN
(SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
person_distinct_id2.distinct_id AS distinct_id
FROM person_distinct_id2
WHERE equals(person_distinct_id2.team_id, 2)
GROUP BY person_distinct_id2.distinct_id
HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))))
WHERE ifNull(equals(step_0, 1), 0)))
GROUP BY aggregation_target,
steps,
prop
HAVING ifNull(equals(steps, max(max_steps)), isNull(steps)
and isNull(max(max_steps))))
GROUP BY prop)
GROUP BY final_prop
LIMIT 100 SETTINGS readonly=2,
max_execution_time=60,
allow_experimental_object_type=1,
format_csv_allow_double_quotes=0,
max_ast_elements=4000000,
max_expanded_ast_elements=4000000,
max_bytes_before_external_group_by=23622320128,
allow_experimental_analyzer=1
'''
# ---
# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step
'''
SELECT sum(step_1) AS step_1,
sum(step_2) AS step_2,
if(isNaN(avgArrayOrNull(step_1_conversion_time_array) AS inter_1_conversion), NULL, inter_1_conversion) AS step_1_average_conversion_time,
if(isNaN(medianArrayOrNull(step_1_conversion_time_array) AS inter_1_median), NULL, inter_1_median) AS step_1_median_conversion_time,
if(ifNull(less(row_number, 26), 0), prop, ['Other']) AS final_prop
FROM
(SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1,
countIf(ifNull(equals(steps, 2), 0)) AS step_2,
groupArray(step_1_conversion_time) AS step_1_conversion_time_array,
prop AS prop,
row_number() OVER (
ORDER BY step_2 DESC) AS row_number
FROM
(SELECT aggregation_target AS aggregation_target,
steps AS steps,
prop AS prop,
prop AS prop,
min(step_1_conversion_time) AS step_1_conversion_time
FROM
(SELECT aggregation_target AS aggregation_target,
steps AS steps,
prop AS prop,
max(steps) OVER (PARTITION BY aggregation_target,
prop) AS max_steps,
step_1_conversion_time AS step_1_conversion_time,
prop AS prop
FROM
(SELECT aggregation_target AS aggregation_target,
timestamp AS timestamp,
step_0 AS step_0,
latest_0 AS latest_0,
step_1 AS step_1,
latest_1 AS latest_1,
prop AS prop,
if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1) AS steps,
if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
prop AS prop
FROM
(SELECT aggregation_target AS aggregation_target,
timestamp AS timestamp,
step_0 AS step_0,
latest_0 AS latest_0,
step_1 AS step_1,
min(latest_1) OVER (PARTITION BY aggregation_target,
prop
ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
prop AS prop
FROM
(SELECT timestamp AS timestamp,
aggregation_target AS aggregation_target,
step_0 AS step_0,
latest_0 AS latest_0,
step_1 AS step_1,
latest_1 AS latest_1,
prop_basic AS prop_basic,
prop_0 AS prop_0,
prop_1 AS prop_1,
prop,
prop_vals AS prop_vals,
prop
FROM
(SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
e__pdi.person_id AS aggregation_target,
if(equals(e.event, 'sign up'), 1, 0) AS step_0,
if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
if(and(equals(e.event, 'buy'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), 'xyz'), 0)), 1, 0) AS step_1,
if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
[ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', '')), '')] AS prop_basic,
if(ifNull(equals(step_0, 1), 0), prop_basic, []) AS prop_0,
if(ifNull(equals(step_1, 1), 0), prop_basic, []) AS prop_1,
prop_1 AS prop,
groupUniqArray(prop) OVER (PARTITION BY aggregation_target) AS prop_vals
FROM events AS e
INNER JOIN
(SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
person_distinct_id2.distinct_id AS distinct_id
FROM person_distinct_id2
WHERE equals(person_distinct_id2.team_id, 2)
GROUP BY person_distinct_id2.distinct_id
HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))) ARRAY
JOIN prop_vals AS prop
WHERE ifNull(notEquals(prop, []), isNotNull(prop)
or isNotNull([]))))
WHERE ifNull(equals(step_0, 1), 0)))
GROUP BY aggregation_target,
steps,
prop
HAVING ifNull(equals(steps, max(max_steps)), isNull(steps)
and isNull(max(max_steps))))
GROUP BY prop)
GROUP BY final_prop
LIMIT 100 SETTINGS readonly=2,
max_execution_time=60,
allow_experimental_object_type=1,
format_csv_allow_double_quotes=0,
max_ast_elements=4000000,
max_expanded_ast_elements=4000000,
max_bytes_before_external_group_by=23622320128,
allow_experimental_analyzer=1
'''
# ---

0 comments on commit 519ec76

Please sign in to comment.