From f6a9e25bd19d56df58af6d44206ff4a38be3287d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Oberm=C3=BCller?= Date: Wed, 14 Feb 2024 13:42:35 +0100 Subject: [PATCH] test(hogql): add breakdown tests for unordered funnel (#20335) --- .../test/__snapshots__/test_funnel.ambr | 674 +++--- .../__snapshots__/test_funnel_strict.ambr | 678 +++--- .../__snapshots__/test_funnel_unordered.ambr | 2147 +++++++++++++++++ .../insights/funnels/test/breakdown_cases.py | 92 +- .../insights/funnels/test/test_funnel.py | 12 +- .../funnels/test/test_funnel_strict.py | 12 +- .../funnels/test/test_funnel_unordered.py | 1221 +++++----- 7 files changed, 3529 insertions(+), 1307 deletions(-) create mode 100644 posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_unordered.ambr diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr index 77c4f901645c2..b960faa708110 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr @@ -350,7 +350,7 @@ if(and(equals(e.event, 'user signed up'), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 1)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 20)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0)), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, @@ -871,7 +871,7 @@ if(and(equals(e.event, 'user signed up'), ifNull(in(e__pdi.person_id, (SELECT person_static_cohort.person_id AS person_id FROM person_static_cohort - WHERE and(equals(person_static_cohort.team_id, 2), equals(person_static_cohort.cohort_id, 2)))), 0)), 1, 0) AS step_0, + WHERE and(equals(person_static_cohort.team_id, 2), equals(person_static_cohort.cohort_id, 21)))), 0)), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(equals(e.event, 'paid'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 @@ -953,20 +953,19 @@ allow_experimental_object_type=1 ''' # --- -# name: TestFunnelBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events +# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen ''' - SELECT ifNull(e__group_0.properties___industry, '') AS value, + SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS value, count(*) AS count FROM events AS e - LEFT JOIN - (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, - groups.group_type_index AS index, - groups.group_key AS key - FROM groups - WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) - GROUP BY groups.group_type_index, - groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) - WHERE and(equals(e.team_id, 2), in(e.event, tuple('buy', 'play movie', 'sign up')), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(e.person_id)) + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), in(e.event, tuple('buy', 'sign up')), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(e__pdi.person_id)) GROUP BY value ORDER BY count DESC, value DESC LIMIT 26 @@ -975,24 +974,19 @@ allow_experimental_object_type=1 ''' # --- -# name: TestFunnelBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events.1 +# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen.1 ''' SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, countIf(ifNull(equals(steps, 2), 0)) AS step_2, - countIf(ifNull(equals(steps, 3), 0)) AS step_3, avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, - avg(step_2_average_conversion_time_inner) AS step_2_average_conversion_time, median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, - median(step_2_median_conversion_time_inner) AS step_2_median_conversion_time, prop AS prop FROM (SELECT aggregation_target AS aggregation_target, steps AS steps, prop AS prop, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, - avg(step_2_conversion_time) AS step_2_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner, - median(step_2_conversion_time) AS step_2_median_conversion_time_inner, prop AS prop FROM (SELECT aggregation_target AS aggregation_target, @@ -1001,7 +995,6 @@ max(steps) OVER (PARTITION BY aggregation_target, prop) AS max_steps, step_1_conversion_time AS step_1_conversion_time, - step_2_conversion_time AS step_2_conversion_time, prop AS prop FROM (SELECT aggregation_target AS aggregation_target, @@ -1010,12 +1003,9 @@ latest_0 AS latest_0, step_1 AS step_1, latest_1 AS latest_1, - step_2 AS step_2, - latest_2 AS latest_2, prop AS prop, - if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(latest_0, toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1)) AS steps, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, - if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(latest_1, toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time, prop AS prop FROM (SELECT aggregation_target AS aggregation_target, @@ -1023,71 +1013,40 @@ step_0 AS step_0, latest_0 AS latest_0, step_1 AS step_1, - latest_1 AS latest_1, - step_2 AS step_2, - min(latest_2) OVER (PARTITION BY aggregation_target, + min(latest_1) OVER (PARTITION BY aggregation_target, prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, - prop AS prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + if(has([[''], ['Mac'], ['Chrome'], ['Safari']], prop), prop, ['Other']) AS prop FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, step_0 AS step_0, latest_0 AS latest_0, step_1 AS step_1, latest_1 AS latest_1, - step_2 AS step_2, - if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2, - prop AS prop + prop_basic AS prop_basic, + prop, + prop_vals AS prop_vals, + if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['']) AS prop FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, - step_2 AS step_2, - min(latest_2) OVER (PARTITION BY aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, - if(has(['technology', 'finance'], prop), prop, 'Other') AS prop - FROM - (SELECT timestamp AS timestamp, - aggregation_target AS aggregation_target, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - step_2 AS step_2, - latest_2 AS latest_2, - prop_basic AS prop_basic, - prop, - prop_vals AS prop_vals, - prop_vals AS prop - FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e.`$group_0` AS aggregation_target, - if(equals(e.event, 'sign up'), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(equals(e.event, 'play movie'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, - if(equals(e.event, 'buy'), 1, 0) AS step_2, - if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, - ifNull(e__group_0.properties___industry, '') AS prop_basic, - prop_basic AS prop, - argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals - FROM events AS e - LEFT JOIN - (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, - groups.group_type_index AS index, - groups.group_key AS key - FROM groups - WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) - GROUP BY groups.group_type_index, - groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'play movie', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0)))))))) + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS aggregation_target, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(and(equals(e.event, 'buy'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), 'xyz'), 0)), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS prop_basic, + prop_basic AS prop, + argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, steps, @@ -1100,26 +1059,19 @@ allow_experimental_object_type=1 ''' # --- -# name: TestFunnelBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events_poe_v2 +# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step ''' - SELECT ifNull(e__group_0.properties___industry, '') AS value, + SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS value, count(*) AS count FROM events AS e - LEFT OUTER JOIN - (SELECT argMax(person_overrides.override_person_id, person_overrides.version) AS override_person_id, - person_overrides.old_person_id AS old_person_id - FROM person_overrides - WHERE equals(person_overrides.team_id, 2) - GROUP BY person_overrides.old_person_id) AS e__override ON equals(e.person_id, e__override.old_person_id) - LEFT JOIN - (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, - groups.group_type_index AS index, - groups.group_key AS key - FROM groups - WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) - GROUP BY groups.group_type_index, - groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) - WHERE and(equals(e.team_id, 2), in(e.event, tuple('buy', 'play movie', 'sign up')), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id))) + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), 'xyz'), 0), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(e__pdi.person_id)) GROUP BY value ORDER BY count DESC, value DESC LIMIT 26 @@ -1128,24 +1080,19 @@ allow_experimental_object_type=1 ''' # --- -# name: TestFunnelBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events_poe_v2.1 +# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step.1 ''' SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, countIf(ifNull(equals(steps, 2), 0)) AS step_2, - countIf(ifNull(equals(steps, 3), 0)) AS step_3, avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, - avg(step_2_average_conversion_time_inner) AS step_2_average_conversion_time, median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, - median(step_2_median_conversion_time_inner) AS step_2_median_conversion_time, prop AS prop FROM (SELECT aggregation_target AS aggregation_target, steps AS steps, prop AS prop, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, - avg(step_2_conversion_time) AS step_2_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner, - median(step_2_conversion_time) AS step_2_median_conversion_time_inner, prop AS prop FROM (SELECT aggregation_target AS aggregation_target, @@ -1154,7 +1101,6 @@ max(steps) OVER (PARTITION BY aggregation_target, prop) AS max_steps, step_1_conversion_time AS step_1_conversion_time, - step_2_conversion_time AS step_2_conversion_time, prop AS prop FROM (SELECT aggregation_target AS aggregation_target, @@ -1163,12 +1109,9 @@ latest_0 AS latest_0, step_1 AS step_1, latest_1 AS latest_1, - step_2 AS step_2, - latest_2 AS latest_2, prop AS prop, - if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(latest_0, toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1)) AS steps, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, - if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(latest_1, toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time, prop AS prop FROM (SELECT aggregation_target AS aggregation_target, @@ -1176,71 +1119,47 @@ step_0 AS step_0, latest_0 AS latest_0, step_1 AS step_1, - latest_1 AS latest_1, - step_2 AS step_2, - min(latest_2) OVER (PARTITION BY aggregation_target, + min(latest_1) OVER (PARTITION BY aggregation_target, prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, - prop AS prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + if(has([['Safari'], ['Mac'], ['Chrome']], prop), prop, ['Other']) AS prop FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, step_0 AS step_0, latest_0 AS latest_0, step_1 AS step_1, latest_1 AS latest_1, - step_2 AS step_2, - if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2, - prop AS prop + prop_basic AS prop_basic, + prop_0 AS prop_0, + prop_1 AS prop_1, + prop, + prop_vals AS prop_vals, + prop FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, - step_2 AS step_2, - min(latest_2) OVER (PARTITION BY aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, - if(has(['technology', 'finance'], prop), prop, 'Other') AS prop - FROM - (SELECT timestamp AS timestamp, - aggregation_target AS aggregation_target, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - step_2 AS step_2, - latest_2 AS latest_2, - prop_basic AS prop_basic, - prop, - prop_vals AS prop_vals, - prop_vals AS prop - FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e.`$group_0` AS aggregation_target, - if(equals(e.event, 'sign up'), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(equals(e.event, 'play movie'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, - if(equals(e.event, 'buy'), 1, 0) AS step_2, - if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, - ifNull(e__group_0.properties___industry, '') AS prop_basic, - prop_basic AS prop, - argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals - FROM events AS e - LEFT JOIN - (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, - groups.group_type_index AS index, - groups.group_key AS key - FROM groups - WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) - GROUP BY groups.group_type_index, - groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'play movie', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0)))))))) + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS aggregation_target, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(and(equals(e.event, 'buy'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), 'xyz'), 0)), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS prop_basic, + if(ifNull(equals(step_0, 1), 0), prop_basic, []) AS prop_0, + if(ifNull(equals(step_1, 1), 0), prop_basic, []) AS prop_1, + prop_1 AS prop, + groupUniqArray(prop) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))) ARRAY + JOIN prop_vals AS prop + WHERE ifNull(notEquals(prop, []), isNotNull(prop) + or isNotNull([])))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, steps, @@ -1253,9 +1172,9 @@ allow_experimental_object_type=1 ''' # --- -# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen +# name: TestFunnelBreakdown.test_funnel_step_multiple_breakdown_snapshot ''' - SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS value, + SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), ''), ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), '')] AS value, count(*) AS count FROM events AS e INNER JOIN @@ -1274,7 +1193,7 @@ allow_experimental_object_type=1 ''' # --- -# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen.1 +# name: TestFunnelBreakdown.test_funnel_step_multiple_breakdown_snapshot.1 ''' SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, countIf(ifNull(equals(steps, 2), 0)) AS step_2, @@ -1316,7 +1235,7 @@ min(latest_1) OVER (PARTITION BY aggregation_target, prop ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, - if(has([[''], ['Mac'], ['Chrome'], ['Safari']], prop), prop, ['Other']) AS prop + if(has([['', ''], ['alakazam', ''], ['Safari', 'xyz'], ['Mac', ''], ['Chrome', 'xyz'], ['0', '0'], ['', 'no-mac']], prop), prop, ['Other']) AS prop FROM (SELECT timestamp AS timestamp, aggregation_target AS aggregation_target, @@ -1327,15 +1246,15 @@ prop_basic AS prop_basic, prop, prop_vals AS prop_vals, - if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['']) AS prop + if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['', '']) AS prop FROM (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e__pdi.person_id AS aggregation_target, if(equals(e.event, 'sign up'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(and(equals(e.event, 'buy'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), 'xyz'), 0)), 1, 0) AS step_1, + if(equals(e.event, 'buy'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, - [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS prop_basic, + [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), ''), ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), '')] AS prop_basic, prop_basic AS prop, argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) OVER (PARTITION BY aggregation_target) AS prop_vals FROM events AS e @@ -1359,19 +1278,173 @@ allow_experimental_object_type=1 ''' # --- -# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step +# name: TestFunnelGroupBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events + ''' + SELECT ifNull(e__group_0.properties___industry, '') AS value, + count(*) AS count + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), in(e.event, tuple('buy', 'play movie', 'sign up')), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(e.person_id)) + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelGroupBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events.1 + ''' + SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, + countIf(ifNull(equals(steps, 2), 0)) AS step_2, + countIf(ifNull(equals(steps, 3), 0)) AS step_3, + avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, + avg(step_2_average_conversion_time_inner) AS step_2_average_conversion_time, + median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, + median(step_2_median_conversion_time_inner) AS step_2_median_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + avg(step_2_conversion_time) AS step_2_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + median(step_2_conversion_time) AS step_2_median_conversion_time_inner, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + max(steps) OVER (PARTITION BY aggregation_target, + prop) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + step_2_conversion_time AS step_2_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + latest_2 AS latest_2, + prop AS prop, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(latest_0, toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1)) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(latest_1, toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, + if(has(['technology', 'finance'], prop), prop, 'Other') AS prop + FROM + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + latest_2 AS latest_2, + prop_basic AS prop_basic, + prop, + prop_vals AS prop_vals, + prop_vals AS prop + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'play movie'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(equals(e.event, 'buy'), 1, 0) AS step_2, + if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, + ifNull(e__group_0.properties___industry, '') AS prop_basic, + prop_basic AS prop, + argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'play movie', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0)))))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps, + prop + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + GROUP BY prop + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelGroupBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events_poe_v2 ''' - SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS value, + SELECT ifNull(e__group_0.properties___industry, '') AS value, count(*) AS count FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), 'xyz'), 0), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(e__pdi.person_id)) + LEFT OUTER JOIN + (SELECT argMax(person_overrides.override_person_id, person_overrides.version) AS override_person_id, + person_overrides.old_person_id AS old_person_id + FROM person_overrides + WHERE equals(person_overrides.team_id, 2) + GROUP BY person_overrides.old_person_id) AS e__override ON equals(e.person_id, e__override.old_person_id) + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), in(e.event, tuple('buy', 'play movie', 'sign up')), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id))) GROUP BY value ORDER BY count DESC, value DESC LIMIT 26 @@ -1380,19 +1453,24 @@ allow_experimental_object_type=1 ''' # --- -# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step.1 +# name: TestFunnelGroupBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events_poe_v2.1 ''' SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, countIf(ifNull(equals(steps, 2), 0)) AS step_2, + countIf(ifNull(equals(steps, 3), 0)) AS step_3, avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, + avg(step_2_average_conversion_time_inner) AS step_2_average_conversion_time, median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, + median(step_2_median_conversion_time_inner) AS step_2_median_conversion_time, prop AS prop FROM (SELECT aggregation_target AS aggregation_target, steps AS steps, prop AS prop, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + avg(step_2_conversion_time) AS step_2_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + median(step_2_conversion_time) AS step_2_median_conversion_time_inner, prop AS prop FROM (SELECT aggregation_target AS aggregation_target, @@ -1401,6 +1479,7 @@ max(steps) OVER (PARTITION BY aggregation_target, prop) AS max_steps, step_1_conversion_time AS step_1_conversion_time, + step_2_conversion_time AS step_2_conversion_time, prop AS prop FROM (SELECT aggregation_target AS aggregation_target, @@ -1409,9 +1488,12 @@ latest_0 AS latest_0, step_1 AS step_1, latest_1 AS latest_1, + step_2 AS step_2, + latest_2 AS latest_2, prop AS prop, - if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(latest_0, toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1)) AS steps, if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(latest_1, toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time, prop AS prop FROM (SELECT aggregation_target AS aggregation_target, @@ -1419,47 +1501,71 @@ step_0 AS step_0, latest_0 AS latest_0, step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target, + latest_1 AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target, prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, - if(has([['Safari'], ['Mac'], ['Chrome']], prop), prop, ['Other']) AS prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, + prop AS prop FROM - (SELECT timestamp AS timestamp, - aggregation_target AS aggregation_target, + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, step_1 AS step_1, latest_1 AS latest_1, - prop_basic AS prop_basic, - prop_0 AS prop_0, - prop_1 AS prop_1, - prop, - prop_vals AS prop_vals, - prop + step_2 AS step_2, + if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2, + prop AS prop FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e__pdi.person_id AS aggregation_target, - if(equals(e.event, 'sign up'), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(and(equals(e.event, 'buy'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), 'xyz'), 0)), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, - [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS prop_basic, - if(ifNull(equals(step_0, 1), 0), prop_basic, []) AS prop_0, - if(ifNull(equals(step_1, 1), 0), prop_basic, []) AS prop_1, - prop_1 AS prop, - groupUniqArray(prop) OVER (PARTITION BY aggregation_target) AS prop_vals - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))) ARRAY - JOIN prop_vals AS prop - WHERE ifNull(notEquals(prop, []), isNotNull(prop) - or isNotNull([])))) + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, + if(has(['technology', 'finance'], prop), prop, 'Other') AS prop + FROM + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + latest_2 AS latest_2, + prop_basic AS prop_basic, + prop, + prop_vals AS prop_vals, + prop_vals AS prop + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'play movie'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(equals(e.event, 'buy'), 1, 0) AS step_2, + if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, + ifNull(e__group_0.properties___industry, '') AS prop_basic, + prop_basic AS prop, + argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'play movie', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0)))))))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, steps, @@ -1472,7 +1578,7 @@ allow_experimental_object_type=1 ''' # --- -# name: TestFunnelBreakdown.test_funnel_breakdown_group +# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group ''' SELECT ifNull(e__group_0.properties___industry, '') AS value, count(*) AS count @@ -1501,7 +1607,7 @@ allow_experimental_object_type=1 ''' # --- -# name: TestFunnelBreakdown.test_funnel_breakdown_group.1 +# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.1 ''' SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, countIf(ifNull(equals(steps, 2), 0)) AS step_2, @@ -1633,7 +1739,7 @@ allow_experimental_object_type=1 ''' # --- -# name: TestFunnelBreakdown.test_funnel_breakdown_group.2 +# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.2 ''' SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, @@ -1656,7 +1762,7 @@ OFFSET 0 ''' # --- -# name: TestFunnelBreakdown.test_funnel_breakdown_group.3 +# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.3 ''' SELECT aggregation_target AS actor_id @@ -1776,7 +1882,7 @@ max_expanded_ast_elements=1000000 ''' # --- -# name: TestFunnelBreakdown.test_funnel_breakdown_group.4 +# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.4 ''' SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, @@ -1799,7 +1905,7 @@ OFFSET 0 ''' # --- -# name: TestFunnelBreakdown.test_funnel_breakdown_group.5 +# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.5 ''' SELECT aggregation_target AS actor_id @@ -1919,7 +2025,7 @@ max_expanded_ast_elements=1000000 ''' # --- -# name: TestFunnelBreakdown.test_funnel_breakdown_group.6 +# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.6 ''' SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, @@ -1942,7 +2048,7 @@ OFFSET 0 ''' # --- -# name: TestFunnelBreakdown.test_funnel_breakdown_group.7 +# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.7 ''' SELECT aggregation_target AS actor_id @@ -2062,7 +2168,7 @@ max_expanded_ast_elements=1000000 ''' # --- -# name: TestFunnelBreakdown.test_funnel_breakdown_group.8 +# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.8 ''' SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, @@ -2085,7 +2191,7 @@ OFFSET 0 ''' # --- -# name: TestFunnelBreakdown.test_funnel_breakdown_group.9 +# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.9 ''' SELECT aggregation_target AS actor_id @@ -2205,109 +2311,3 @@ max_expanded_ast_elements=1000000 ''' # --- -# name: TestFunnelBreakdown.test_funnel_step_multiple_breakdown_snapshot - ''' - SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), ''), ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), '')] AS value, - count(*) AS count - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), in(e.event, tuple('buy', 'sign up')), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(e__pdi.person_id)) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 - ''' -# --- -# name: TestFunnelBreakdown.test_funnel_step_multiple_breakdown_snapshot.1 - ''' - SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, - countIf(ifNull(equals(steps, 2), 0)) AS step_2, - avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, - median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, - prop AS prop - FROM - (SELECT aggregation_target AS aggregation_target, - steps AS steps, - prop AS prop, - avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, - median(step_1_conversion_time) AS step_1_median_conversion_time_inner, - prop AS prop - FROM - (SELECT aggregation_target AS aggregation_target, - steps AS steps, - prop AS prop, - max(steps) OVER (PARTITION BY aggregation_target, - prop) AS max_steps, - step_1_conversion_time AS step_1_conversion_time, - prop AS prop - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - prop AS prop, - if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, - prop AS prop - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, - if(has([['', ''], ['alakazam', ''], ['Safari', 'xyz'], ['Mac', ''], ['Chrome', 'xyz'], ['0', '0'], ['', 'no-mac']], prop), prop, ['Other']) AS prop - FROM - (SELECT timestamp AS timestamp, - aggregation_target AS aggregation_target, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - prop_basic AS prop_basic, - prop, - prop_vals AS prop_vals, - if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['', '']) AS prop - FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e__pdi.person_id AS aggregation_target, - if(equals(e.event, 'sign up'), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(equals(e.event, 'buy'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, - [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), ''), ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), '')] AS prop_basic, - prop_basic AS prop, - argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) OVER (PARTITION BY aggregation_target) AS prop_vals - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))) - WHERE ifNull(equals(step_0, 1), 0))) - GROUP BY aggregation_target, - steps, - prop - HAVING ifNull(equals(steps, max_steps), isNull(steps) - and isNull(max_steps))) - GROUP BY prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 - ''' -# --- diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_strict.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_strict.ambr index 75f2c0eac88db..5b12c1d8d00e0 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_strict.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_strict.ambr @@ -1,18 +1,17 @@ # serializer version: 1 -# name: TestFunnelStrictStepsBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events +# name: TestFunnelStrictStepsBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen ''' - SELECT ifNull(e__group_0.properties___industry, '') AS value, + SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS value, count(*) AS count FROM events AS e - LEFT JOIN - (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, - groups.group_type_index AS index, - groups.group_key AS key - FROM groups - WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) - GROUP BY groups.group_type_index, - groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) - WHERE and(equals(e.team_id, 2), in(e.event, tuple('buy', 'play movie', 'sign up')), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(e.person_id)) + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), in(e.event, tuple('buy', 'sign up')), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(e__pdi.person_id)) GROUP BY value ORDER BY count DESC, value DESC LIMIT 26 @@ -21,24 +20,19 @@ allow_experimental_object_type=1 ''' # --- -# name: TestFunnelStrictStepsBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events.1 +# name: TestFunnelStrictStepsBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen.1 ''' SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, countIf(ifNull(equals(steps, 2), 0)) AS step_2, - countIf(ifNull(equals(steps, 3), 0)) AS step_3, avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, - avg(step_2_average_conversion_time_inner) AS step_2_average_conversion_time, median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, - median(step_2_median_conversion_time_inner) AS step_2_median_conversion_time, prop AS prop FROM (SELECT aggregation_target AS aggregation_target, steps AS steps, prop AS prop, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, - avg(step_2_conversion_time) AS step_2_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner, - median(step_2_conversion_time) AS step_2_median_conversion_time_inner, prop AS prop FROM (SELECT aggregation_target AS aggregation_target, @@ -47,7 +41,6 @@ max(steps) OVER (PARTITION BY aggregation_target, prop) AS max_steps, step_1_conversion_time AS step_1_conversion_time, - step_2_conversion_time AS step_2_conversion_time, prop AS prop FROM (SELECT aggregation_target AS aggregation_target, @@ -56,84 +49,49 @@ latest_0 AS latest_0, step_1 AS step_1, latest_1 AS latest_1, - step_2 AS step_2, - latest_2 AS latest_2, prop AS prop, - if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(latest_0, toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1)) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, - if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(latest_1, toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time, - prop AS prop + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time FROM (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, step_1 AS step_1, - latest_1 AS latest_1, - step_2 AS step_2, - min(latest_2) OVER (PARTITION BY aggregation_target, + min(latest_1) OVER (PARTITION BY aggregation_target, prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, - prop AS prop + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS latest_1, + if(has([[''], ['Mac'], ['Chrome'], ['Safari']], prop), prop, ['Other']) AS prop FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, step_0 AS step_0, latest_0 AS latest_0, step_1 AS step_1, latest_1 AS latest_1, - step_2 AS step_2, - if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2, - prop AS prop + prop_basic AS prop_basic, + prop, + prop_vals AS prop_vals, + if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['']) AS prop FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, - step_2 AS step_2, - min(latest_2) OVER (PARTITION BY aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, - if(has(['technology', 'finance'], prop), prop, 'Other') AS prop - FROM - (SELECT timestamp AS timestamp, - aggregation_target AS aggregation_target, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - step_2 AS step_2, - latest_2 AS latest_2, - prop_basic AS prop_basic, - prop, - prop_vals AS prop_vals, - prop_vals AS prop - FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e.`$group_0` AS aggregation_target, - if(equals(e.event, 'sign up'), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(equals(e.event, 'play movie'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, - if(equals(e.event, 'buy'), 1, 0) AS step_2, - if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, - ifNull(e__group_0.properties___industry, '') AS prop_basic, - prop_basic AS prop, - argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals - FROM events AS e - LEFT JOIN - (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, - groups.group_type_index AS index, - groups.group_key AS key - FROM groups - WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) - GROUP BY groups.group_type_index, - groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'play movie', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0)))))))) + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS aggregation_target, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(and(equals(e.event, 'buy'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), 'xyz'), 0)), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS prop_basic, + prop_basic AS prop, + argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))))))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, steps, @@ -146,26 +104,19 @@ allow_experimental_object_type=1 ''' # --- -# name: TestFunnelStrictStepsBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events_poe_v2 +# name: TestFunnelStrictStepsBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step ''' - SELECT ifNull(e__group_0.properties___industry, '') AS value, + SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS value, count(*) AS count FROM events AS e - LEFT OUTER JOIN - (SELECT argMax(person_overrides.override_person_id, person_overrides.version) AS override_person_id, - person_overrides.old_person_id AS old_person_id - FROM person_overrides - WHERE equals(person_overrides.team_id, 2) - GROUP BY person_overrides.old_person_id) AS e__override ON equals(e.person_id, e__override.old_person_id) - LEFT JOIN - (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, - groups.group_type_index AS index, - groups.group_key AS key - FROM groups - WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) - GROUP BY groups.group_type_index, - groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) - WHERE and(equals(e.team_id, 2), in(e.event, tuple('buy', 'play movie', 'sign up')), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id))) + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), 'xyz'), 0), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(e__pdi.person_id)) GROUP BY value ORDER BY count DESC, value DESC LIMIT 26 @@ -174,24 +125,19 @@ allow_experimental_object_type=1 ''' # --- -# name: TestFunnelStrictStepsBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events_poe_v2.1 +# name: TestFunnelStrictStepsBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step.1 ''' SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, countIf(ifNull(equals(steps, 2), 0)) AS step_2, - countIf(ifNull(equals(steps, 3), 0)) AS step_3, avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, - avg(step_2_average_conversion_time_inner) AS step_2_average_conversion_time, median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, - median(step_2_median_conversion_time_inner) AS step_2_median_conversion_time, prop AS prop FROM (SELECT aggregation_target AS aggregation_target, steps AS steps, prop AS prop, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, - avg(step_2_conversion_time) AS step_2_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner, - median(step_2_conversion_time) AS step_2_median_conversion_time_inner, prop AS prop FROM (SELECT aggregation_target AS aggregation_target, @@ -200,7 +146,6 @@ max(steps) OVER (PARTITION BY aggregation_target, prop) AS max_steps, step_1_conversion_time AS step_1_conversion_time, - step_2_conversion_time AS step_2_conversion_time, prop AS prop FROM (SELECT aggregation_target AS aggregation_target, @@ -209,84 +154,56 @@ latest_0 AS latest_0, step_1 AS step_1, latest_1 AS latest_1, - step_2 AS step_2, - latest_2 AS latest_2, prop AS prop, - if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(latest_0, toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1)) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, - if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(latest_1, toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time, - prop AS prop + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time FROM (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, step_1 AS step_1, - latest_1 AS latest_1, - step_2 AS step_2, - min(latest_2) OVER (PARTITION BY aggregation_target, + min(latest_1) OVER (PARTITION BY aggregation_target, prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, - prop AS prop + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS latest_1, + if(has([['Safari'], ['Mac'], ['Chrome']], prop), prop, ['Other']) AS prop FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, step_0 AS step_0, latest_0 AS latest_0, step_1 AS step_1, latest_1 AS latest_1, - step_2 AS step_2, - if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2, - prop AS prop + prop_basic AS prop_basic, + prop_0 AS prop_0, + prop_1 AS prop_1, + prop, + prop_vals AS prop_vals, + prop FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, - step_2 AS step_2, - min(latest_2) OVER (PARTITION BY aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, - if(has(['technology', 'finance'], prop), prop, 'Other') AS prop - FROM - (SELECT timestamp AS timestamp, - aggregation_target AS aggregation_target, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - step_2 AS step_2, - latest_2 AS latest_2, - prop_basic AS prop_basic, - prop, - prop_vals AS prop_vals, - prop_vals AS prop - FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e.`$group_0` AS aggregation_target, - if(equals(e.event, 'sign up'), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(equals(e.event, 'play movie'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, - if(equals(e.event, 'buy'), 1, 0) AS step_2, - if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, - ifNull(e__group_0.properties___industry, '') AS prop_basic, - prop_basic AS prop, - argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals - FROM events AS e - LEFT JOIN - (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, - groups.group_type_index AS index, - groups.group_key AS key - FROM groups - WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) - GROUP BY groups.group_type_index, - groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'play movie', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0)))))))) + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS aggregation_target, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(and(equals(e.event, 'buy'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), 'xyz'), 0)), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS prop_basic, + if(ifNull(equals(step_0, 1), 0), prop_basic, []) AS prop_0, + if(ifNull(equals(step_1, 1), 0), prop_basic, []) AS prop_1, + prop_1 AS prop, + groupUniqArray(prop) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))))) ARRAY + JOIN prop_vals AS prop + WHERE ifNull(notEquals(prop, []), isNotNull(prop) + or isNotNull([])))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, steps, @@ -299,9 +216,9 @@ allow_experimental_object_type=1 ''' # --- -# name: TestFunnelStrictStepsBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen +# name: TestFunnelStrictStepsBreakdown.test_funnel_step_multiple_breakdown_snapshot ''' - SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS value, + SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), ''), ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), '')] AS value, count(*) AS count FROM events AS e INNER JOIN @@ -320,7 +237,7 @@ allow_experimental_object_type=1 ''' # --- -# name: TestFunnelStrictStepsBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen.1 +# name: TestFunnelStrictStepsBreakdown.test_funnel_step_multiple_breakdown_snapshot.1 ''' SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, countIf(ifNull(equals(steps, 2), 0)) AS step_2, @@ -361,7 +278,7 @@ min(latest_1) OVER (PARTITION BY aggregation_target, prop ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS latest_1, - if(has([[''], ['Mac'], ['Chrome'], ['Safari']], prop), prop, ['Other']) AS prop + if(has([['', ''], ['alakazam', ''], ['Safari', 'xyz'], ['Mac', ''], ['Chrome', 'xyz'], ['0', '0'], ['', 'no-mac']], prop), prop, ['Other']) AS prop FROM (SELECT timestamp AS timestamp, aggregation_target AS aggregation_target, @@ -372,15 +289,15 @@ prop_basic AS prop_basic, prop, prop_vals AS prop_vals, - if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['']) AS prop + if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['', '']) AS prop FROM (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e__pdi.person_id AS aggregation_target, if(equals(e.event, 'sign up'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(and(equals(e.event, 'buy'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), 'xyz'), 0)), 1, 0) AS step_1, + if(equals(e.event, 'buy'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, - [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS prop_basic, + [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), ''), ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), '')] AS prop_basic, prop_basic AS prop, argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) OVER (PARTITION BY aggregation_target) AS prop_vals FROM events AS e @@ -404,19 +321,173 @@ allow_experimental_object_type=1 ''' # --- -# name: TestFunnelStrictStepsBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step +# name: TestStrictFunnelGroupBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events + ''' + SELECT ifNull(e__group_0.properties___industry, '') AS value, + count(*) AS count + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), in(e.event, tuple('buy', 'play movie', 'sign up')), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(e.person_id)) + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestStrictFunnelGroupBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events.1 + ''' + SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, + countIf(ifNull(equals(steps, 2), 0)) AS step_2, + countIf(ifNull(equals(steps, 3), 0)) AS step_3, + avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, + avg(step_2_average_conversion_time_inner) AS step_2_average_conversion_time, + median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, + median(step_2_median_conversion_time_inner) AS step_2_median_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + avg(step_2_conversion_time) AS step_2_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + median(step_2_conversion_time) AS step_2_median_conversion_time_inner, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + max(steps) OVER (PARTITION BY aggregation_target, + prop) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + step_2_conversion_time AS step_2_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + latest_2 AS latest_2, + prop AS prop, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(latest_0, toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1)) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(latest_1, toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, + if(has(['technology', 'finance'], prop), prop, 'Other') AS prop + FROM + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + latest_2 AS latest_2, + prop_basic AS prop_basic, + prop, + prop_vals AS prop_vals, + prop_vals AS prop + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'play movie'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(equals(e.event, 'buy'), 1, 0) AS step_2, + if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, + ifNull(e__group_0.properties___industry, '') AS prop_basic, + prop_basic AS prop, + argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'play movie', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0)))))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps, + prop + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + GROUP BY prop + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestStrictFunnelGroupBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events_poe_v2 ''' - SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS value, + SELECT ifNull(e__group_0.properties___industry, '') AS value, count(*) AS count FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), 'xyz'), 0), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(e__pdi.person_id)) + LEFT OUTER JOIN + (SELECT argMax(person_overrides.override_person_id, person_overrides.version) AS override_person_id, + person_overrides.old_person_id AS old_person_id + FROM person_overrides + WHERE equals(person_overrides.team_id, 2) + GROUP BY person_overrides.old_person_id) AS e__override ON equals(e.person_id, e__override.old_person_id) + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), in(e.event, tuple('buy', 'play movie', 'sign up')), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id))) GROUP BY value ORDER BY count DESC, value DESC LIMIT 26 @@ -425,19 +496,24 @@ allow_experimental_object_type=1 ''' # --- -# name: TestFunnelStrictStepsBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step.1 +# name: TestStrictFunnelGroupBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events_poe_v2.1 ''' SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, countIf(ifNull(equals(steps, 2), 0)) AS step_2, + countIf(ifNull(equals(steps, 3), 0)) AS step_3, avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, + avg(step_2_average_conversion_time_inner) AS step_2_average_conversion_time, median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, + median(step_2_median_conversion_time_inner) AS step_2_median_conversion_time, prop AS prop FROM (SELECT aggregation_target AS aggregation_target, steps AS steps, prop AS prop, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + avg(step_2_conversion_time) AS step_2_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + median(step_2_conversion_time) AS step_2_median_conversion_time_inner, prop AS prop FROM (SELECT aggregation_target AS aggregation_target, @@ -446,6 +522,7 @@ max(steps) OVER (PARTITION BY aggregation_target, prop) AS max_steps, step_1_conversion_time AS step_1_conversion_time, + step_2_conversion_time AS step_2_conversion_time, prop AS prop FROM (SELECT aggregation_target AS aggregation_target, @@ -454,56 +531,84 @@ latest_0 AS latest_0, step_1 AS step_1, latest_1 AS latest_1, + step_2 AS step_2, + latest_2 AS latest_2, prop AS prop, - if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(latest_0, toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1)) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(latest_1, toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time, + prop AS prop FROM (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target, + latest_1 AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target, prop - ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS latest_1, - if(has([['Safari'], ['Mac'], ['Chrome']], prop), prop, ['Other']) AS prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, + prop AS prop FROM - (SELECT timestamp AS timestamp, - aggregation_target AS aggregation_target, + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, step_1 AS step_1, latest_1 AS latest_1, - prop_basic AS prop_basic, - prop_0 AS prop_0, - prop_1 AS prop_1, - prop, - prop_vals AS prop_vals, - prop + step_2 AS step_2, + if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2, + prop AS prop FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e__pdi.person_id AS aggregation_target, - if(equals(e.event, 'sign up'), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(and(equals(e.event, 'buy'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), 'xyz'), 0)), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, - [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS prop_basic, - if(ifNull(equals(step_0, 1), 0), prop_basic, []) AS prop_0, - if(ifNull(equals(step_1, 1), 0), prop_basic, []) AS prop_1, - prop_1 AS prop, - groupUniqArray(prop) OVER (PARTITION BY aggregation_target) AS prop_vals - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))))) ARRAY - JOIN prop_vals AS prop - WHERE ifNull(notEquals(prop, []), isNotNull(prop) - or isNotNull([])))) + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, + if(has(['technology', 'finance'], prop), prop, 'Other') AS prop + FROM + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + latest_2 AS latest_2, + prop_basic AS prop_basic, + prop, + prop_vals AS prop_vals, + prop_vals AS prop + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'play movie'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(equals(e.event, 'buy'), 1, 0) AS step_2, + if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, + ifNull(e__group_0.properties___industry, '') AS prop_basic, + prop_basic AS prop, + argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'play movie', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0)))))))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, steps, @@ -516,7 +621,7 @@ allow_experimental_object_type=1 ''' # --- -# name: TestFunnelStrictStepsBreakdown.test_funnel_breakdown_group +# name: TestStrictFunnelGroupBreakdown.test_funnel_breakdown_group ''' SELECT ifNull(e__group_0.properties___industry, '') AS value, count(*) AS count @@ -545,7 +650,7 @@ allow_experimental_object_type=1 ''' # --- -# name: TestFunnelStrictStepsBreakdown.test_funnel_breakdown_group.1 +# name: TestStrictFunnelGroupBreakdown.test_funnel_breakdown_group.1 ''' SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, countIf(ifNull(equals(steps, 2), 0)) AS step_2, @@ -677,7 +782,7 @@ allow_experimental_object_type=1 ''' # --- -# name: TestFunnelStrictStepsBreakdown.test_funnel_breakdown_group.2 +# name: TestStrictFunnelGroupBreakdown.test_funnel_breakdown_group.2 ''' SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, @@ -700,7 +805,7 @@ OFFSET 0 ''' # --- -# name: TestFunnelStrictStepsBreakdown.test_funnel_breakdown_group.3 +# name: TestStrictFunnelGroupBreakdown.test_funnel_breakdown_group.3 ''' SELECT aggregation_target AS actor_id @@ -797,7 +902,7 @@ max_expanded_ast_elements=1000000 ''' # --- -# name: TestFunnelStrictStepsBreakdown.test_funnel_breakdown_group.4 +# name: TestStrictFunnelGroupBreakdown.test_funnel_breakdown_group.4 ''' SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, @@ -820,7 +925,7 @@ OFFSET 0 ''' # --- -# name: TestFunnelStrictStepsBreakdown.test_funnel_breakdown_group.5 +# name: TestStrictFunnelGroupBreakdown.test_funnel_breakdown_group.5 ''' SELECT aggregation_target AS actor_id @@ -917,7 +1022,7 @@ max_expanded_ast_elements=1000000 ''' # --- -# name: TestFunnelStrictStepsBreakdown.test_funnel_breakdown_group.6 +# name: TestStrictFunnelGroupBreakdown.test_funnel_breakdown_group.6 ''' SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, @@ -940,7 +1045,7 @@ OFFSET 0 ''' # --- -# name: TestFunnelStrictStepsBreakdown.test_funnel_breakdown_group.7 +# name: TestStrictFunnelGroupBreakdown.test_funnel_breakdown_group.7 ''' SELECT aggregation_target AS actor_id @@ -1037,7 +1142,7 @@ max_expanded_ast_elements=1000000 ''' # --- -# name: TestFunnelStrictStepsBreakdown.test_funnel_breakdown_group.8 +# name: TestStrictFunnelGroupBreakdown.test_funnel_breakdown_group.8 ''' SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, @@ -1060,7 +1165,7 @@ OFFSET 0 ''' # --- -# name: TestFunnelStrictStepsBreakdown.test_funnel_breakdown_group.9 +# name: TestStrictFunnelGroupBreakdown.test_funnel_breakdown_group.9 ''' SELECT aggregation_target AS actor_id @@ -1157,108 +1262,3 @@ max_expanded_ast_elements=1000000 ''' # --- -# name: TestFunnelStrictStepsBreakdown.test_funnel_step_multiple_breakdown_snapshot - ''' - SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), ''), ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), '')] AS value, - count(*) AS count - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), in(e.event, tuple('buy', 'sign up')), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(e__pdi.person_id)) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 - ''' -# --- -# name: TestFunnelStrictStepsBreakdown.test_funnel_step_multiple_breakdown_snapshot.1 - ''' - SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, - countIf(ifNull(equals(steps, 2), 0)) AS step_2, - avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, - median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, - prop AS prop - FROM - (SELECT aggregation_target AS aggregation_target, - steps AS steps, - prop AS prop, - avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, - median(step_1_conversion_time) AS step_1_median_conversion_time_inner, - prop AS prop - FROM - (SELECT aggregation_target AS aggregation_target, - steps AS steps, - prop AS prop, - max(steps) OVER (PARTITION BY aggregation_target, - prop) AS max_steps, - step_1_conversion_time AS step_1_conversion_time, - prop AS prop - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - prop AS prop, - if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS latest_1, - if(has([['', ''], ['alakazam', ''], ['Safari', 'xyz'], ['Mac', ''], ['Chrome', 'xyz'], ['0', '0'], ['', 'no-mac']], prop), prop, ['Other']) AS prop - FROM - (SELECT timestamp AS timestamp, - aggregation_target AS aggregation_target, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - prop_basic AS prop_basic, - prop, - prop_vals AS prop_vals, - if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['', '']) AS prop - FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e__pdi.person_id AS aggregation_target, - if(equals(e.event, 'sign up'), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(equals(e.event, 'buy'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, - [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), ''), ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), '')] AS prop_basic, - prop_basic AS prop, - argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) OVER (PARTITION BY aggregation_target) AS prop_vals - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))))))) - WHERE ifNull(equals(step_0, 1), 0))) - GROUP BY aggregation_target, - steps, - prop - HAVING ifNull(equals(steps, max_steps), isNull(steps) - and isNull(max_steps))) - GROUP BY prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 - ''' -# --- diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_unordered.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_unordered.ambr new file mode 100644 index 0000000000000..214583b03f081 --- /dev/null +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_unordered.ambr @@ -0,0 +1,2147 @@ +# serializer version: 1 +# name: TestFunnelUnorderedStepsBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen + ''' + SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS value, + count(*) AS count + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), in(e.event, tuple('buy', 'sign up')), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(e__pdi.person_id)) + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelUnorderedStepsBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen.1 + ''' + SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, + countIf(ifNull(equals(steps, 2), 0)) AS step_2, + avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, + median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + max(steps) OVER (PARTITION BY aggregation_target, + prop) AS max_steps, + step_1_conversion_time AS step_1_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + prop AS prop, + arraySort([latest_0, latest_1]) AS event_times, + arraySum([if(and(ifNull(less(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 1, 0), 1]) AS steps, + arraySort([latest_0, latest_1]) AS conversion_times, + if(and(isNotNull(conversion_times[2]), ifNull(lessOrEquals(conversion_times[2], plus(conversion_times[1], toIntervalDay(14))), 0)), dateDiff('second', conversion_times[1], conversion_times[2]), NULL) AS step_1_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + if(has([[''], ['Mac'], ['Chrome'], ['Safari']], prop), prop, ['Other']) AS prop + FROM + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + prop_basic AS prop_basic, + prop, + prop_vals AS prop_vals, + if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['']) AS prop + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS aggregation_target, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(and(equals(e.event, 'buy'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), 'xyz'), 0)), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS prop_basic, + prop_basic AS prop, + argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))) + WHERE ifNull(equals(step_0, 1), 0) + UNION ALL SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + prop AS prop, + arraySort([latest_0, latest_1]) AS event_times, + arraySum([if(and(ifNull(less(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 1, 0), 1]) AS steps, + arraySort([latest_0, latest_1]) AS conversion_times, + if(and(isNotNull(conversion_times[2]), ifNull(lessOrEquals(conversion_times[2], plus(conversion_times[1], toIntervalDay(14))), 0)), dateDiff('second', conversion_times[1], conversion_times[2]), NULL) AS step_1_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + if(has([[''], ['Mac'], ['Chrome'], ['Safari']], prop), prop, ['Other']) AS prop + FROM + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + prop_basic AS prop_basic, + prop, + prop_vals AS prop_vals, + if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['']) AS prop + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS aggregation_target, + if(and(equals(e.event, 'buy'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), 'xyz'), 0)), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'sign up'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS prop_basic, + prop_basic AS prop, + argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps, + prop + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + GROUP BY prop + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelUnorderedStepsBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step + ''' + SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS value, + count(*) AS count + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), in(e.event, tuple('buy', 'sign up')), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(e__pdi.person_id)) + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelUnorderedStepsBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step.1 + ''' + SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, + countIf(ifNull(equals(steps, 2), 0)) AS step_2, + avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, + median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + max(steps) OVER (PARTITION BY aggregation_target, + prop) AS max_steps, + step_1_conversion_time AS step_1_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + prop AS prop, + arraySort([latest_0, latest_1]) AS event_times, + arraySum([if(and(ifNull(less(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 1, 0), 1]) AS steps, + arraySort([latest_0, latest_1]) AS conversion_times, + if(and(isNotNull(conversion_times[2]), ifNull(lessOrEquals(conversion_times[2], plus(conversion_times[1], toIntervalDay(14))), 0)), dateDiff('second', conversion_times[1], conversion_times[2]), NULL) AS step_1_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + if(has([[''], ['Mac'], ['Chrome'], ['Safari']], prop), prop, ['Other']) AS prop + FROM + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + prop_basic AS prop_basic, + prop_0 AS prop_0, + prop_1 AS prop_1, + prop, + prop_vals AS prop_vals, + prop + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS aggregation_target, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(and(equals(e.event, 'buy'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), 'xyz'), 0)), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS prop_basic, + if(ifNull(equals(step_0, 1), 0), prop_basic, []) AS prop_0, + if(ifNull(equals(step_1, 1), 0), prop_basic, []) AS prop_1, + prop_1 AS prop, + groupUniqArray(prop) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))) ARRAY + JOIN prop_vals AS prop + WHERE ifNull(notEquals(prop, []), isNotNull(prop) + or isNotNull([])))) + WHERE ifNull(equals(step_0, 1), 0) + UNION ALL SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + prop AS prop, + arraySort([latest_0, latest_1]) AS event_times, + arraySum([if(and(ifNull(less(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 1, 0), 1]) AS steps, + arraySort([latest_0, latest_1]) AS conversion_times, + if(and(isNotNull(conversion_times[2]), ifNull(lessOrEquals(conversion_times[2], plus(conversion_times[1], toIntervalDay(14))), 0)), dateDiff('second', conversion_times[1], conversion_times[2]), NULL) AS step_1_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + if(has([[''], ['Mac'], ['Chrome'], ['Safari']], prop), prop, ['Other']) AS prop + FROM + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + prop_basic AS prop_basic, + prop_0 AS prop_0, + prop_1 AS prop_1, + prop, + prop_vals AS prop_vals, + prop + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS aggregation_target, + if(and(equals(e.event, 'buy'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), 'xyz'), 0)), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'sign up'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS prop_basic, + if(ifNull(equals(step_0, 1), 0), prop_basic, []) AS prop_0, + if(ifNull(equals(step_1, 1), 0), prop_basic, []) AS prop_1, + prop_1 AS prop, + groupUniqArray(prop) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))) ARRAY + JOIN prop_vals AS prop + WHERE ifNull(notEquals(prop, []), isNotNull(prop) + or isNotNull([])))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps, + prop + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + GROUP BY prop + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelUnorderedStepsBreakdown.test_funnel_step_multiple_breakdown_snapshot + ''' + SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), ''), ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), '')] AS value, + count(*) AS count + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), in(e.event, tuple('buy', 'sign up')), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(e__pdi.person_id)) + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelUnorderedStepsBreakdown.test_funnel_step_multiple_breakdown_snapshot.1 + ''' + SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, + countIf(ifNull(equals(steps, 2), 0)) AS step_2, + avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, + median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + max(steps) OVER (PARTITION BY aggregation_target, + prop) AS max_steps, + step_1_conversion_time AS step_1_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + prop AS prop, + arraySort([latest_0, latest_1]) AS event_times, + arraySum([if(and(ifNull(less(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 1, 0), 1]) AS steps, + arraySort([latest_0, latest_1]) AS conversion_times, + if(and(isNotNull(conversion_times[2]), ifNull(lessOrEquals(conversion_times[2], plus(conversion_times[1], toIntervalDay(14))), 0)), dateDiff('second', conversion_times[1], conversion_times[2]), NULL) AS step_1_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + if(has([['', ''], ['alakazam', ''], ['Safari', 'xyz'], ['Mac', ''], ['Chrome', 'xyz'], ['0', '0'], ['', 'no-mac']], prop), prop, ['Other']) AS prop + FROM + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + prop_basic AS prop_basic, + prop, + prop_vals AS prop_vals, + if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['', '']) AS prop + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS aggregation_target, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'buy'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), ''), ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), '')] AS prop_basic, + prop_basic AS prop, + argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))) + WHERE ifNull(equals(step_0, 1), 0) + UNION ALL SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + prop AS prop, + arraySort([latest_0, latest_1]) AS event_times, + arraySum([if(and(ifNull(less(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 1, 0), 1]) AS steps, + arraySort([latest_0, latest_1]) AS conversion_times, + if(and(isNotNull(conversion_times[2]), ifNull(lessOrEquals(conversion_times[2], plus(conversion_times[1], toIntervalDay(14))), 0)), dateDiff('second', conversion_times[1], conversion_times[2]), NULL) AS step_1_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + if(has([['', ''], ['alakazam', ''], ['Safari', 'xyz'], ['Mac', ''], ['Chrome', 'xyz'], ['0', '0'], ['', 'no-mac']], prop), prop, ['Other']) AS prop + FROM + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + prop_basic AS prop_basic, + prop, + prop_vals AS prop_vals, + if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['', '']) AS prop + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS aggregation_target, + if(equals(e.event, 'buy'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'sign up'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), ''), ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), '')] AS prop_basic, + prop_basic AS prop, + argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps, + prop + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + GROUP BY prop + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestUnorderedFunnelGroupBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events + ''' + SELECT ifNull(e__group_0.properties___industry, '') AS value, + count(*) AS count + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), in(e.event, tuple('buy', 'play movie', 'sign up')), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(e.person_id)) + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestUnorderedFunnelGroupBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events.1 + ''' + SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, + countIf(ifNull(equals(steps, 2), 0)) AS step_2, + countIf(ifNull(equals(steps, 3), 0)) AS step_3, + avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, + avg(step_2_average_conversion_time_inner) AS step_2_average_conversion_time, + median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, + median(step_2_median_conversion_time_inner) AS step_2_median_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + avg(step_2_conversion_time) AS step_2_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + median(step_2_conversion_time) AS step_2_median_conversion_time_inner, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + max(steps) OVER (PARTITION BY aggregation_target, + prop) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + step_2_conversion_time AS step_2_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + latest_2 AS latest_2, + prop AS prop, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(latest_0, toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1)) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(latest_1, toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, + if(has(['technology', 'finance'], prop), prop, 'Other') AS prop + FROM + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + latest_2 AS latest_2, + prop_basic AS prop_basic, + prop, + prop_vals AS prop_vals, + prop_vals AS prop + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'play movie'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(equals(e.event, 'buy'), 1, 0) AS step_2, + if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, + ifNull(e__group_0.properties___industry, '') AS prop_basic, + prop_basic AS prop, + argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'play movie', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0)))))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps, + prop + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + GROUP BY prop + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestUnorderedFunnelGroupBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events_poe_v2 + ''' + SELECT ifNull(e__group_0.properties___industry, '') AS value, + count(*) AS count + FROM events AS e + LEFT OUTER JOIN + (SELECT argMax(person_overrides.override_person_id, person_overrides.version) AS override_person_id, + person_overrides.old_person_id AS old_person_id + FROM person_overrides + WHERE equals(person_overrides.team_id, 2) + GROUP BY person_overrides.old_person_id) AS e__override ON equals(e.person_id, e__override.old_person_id) + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), in(e.event, tuple('buy', 'play movie', 'sign up')), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id))) + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestUnorderedFunnelGroupBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events_poe_v2.1 + ''' + SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, + countIf(ifNull(equals(steps, 2), 0)) AS step_2, + countIf(ifNull(equals(steps, 3), 0)) AS step_3, + avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, + avg(step_2_average_conversion_time_inner) AS step_2_average_conversion_time, + median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, + median(step_2_median_conversion_time_inner) AS step_2_median_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + avg(step_2_conversion_time) AS step_2_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + median(step_2_conversion_time) AS step_2_median_conversion_time_inner, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + max(steps) OVER (PARTITION BY aggregation_target, + prop) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + step_2_conversion_time AS step_2_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + latest_2 AS latest_2, + prop AS prop, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(latest_0, toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1)) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(latest_1, toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, + if(has(['technology', 'finance'], prop), prop, 'Other') AS prop + FROM + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + latest_2 AS latest_2, + prop_basic AS prop_basic, + prop, + prop_vals AS prop_vals, + prop_vals AS prop + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'play movie'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(equals(e.event, 'buy'), 1, 0) AS step_2, + if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, + ifNull(e__group_0.properties___industry, '') AS prop_basic, + prop_basic AS prop, + argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'play movie', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0)))))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps, + prop + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + GROUP BY prop + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group + ''' + SELECT ifNull(e__group_0.properties___industry, '') AS value, + count(*) AS count + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), in(e.event, tuple('buy', 'play movie', 'sign up')), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(e__pdi.person_id)) + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group.1 + ''' + SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, + countIf(ifNull(equals(steps, 2), 0)) AS step_2, + countIf(ifNull(equals(steps, 3), 0)) AS step_3, + avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, + avg(step_2_average_conversion_time_inner) AS step_2_average_conversion_time, + median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, + median(step_2_median_conversion_time_inner) AS step_2_median_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + avg(step_2_conversion_time) AS step_2_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + median(step_2_conversion_time) AS step_2_median_conversion_time_inner, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + max(steps) OVER (PARTITION BY aggregation_target, + prop) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + step_2_conversion_time AS step_2_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + latest_2 AS latest_2, + prop AS prop, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(latest_0, toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1)) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(latest_1, toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, + if(has(['technology', 'finance'], prop), prop, 'Other') AS prop + FROM + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + latest_2 AS latest_2, + prop_basic AS prop_basic, + prop, + prop_vals AS prop_vals, + prop_vals AS prop + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS aggregation_target, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'play movie'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(equals(e.event, 'buy'), 1, 0) AS step_2, + if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, + ifNull(e__group_0.properties___industry, '') AS prop_basic, + prop_basic AS prop, + argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'play movie', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0)))))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps, + prop + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + GROUP BY prop + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group.10 + ''' + + SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, + count(*) as count + FROM events e + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 + ''' +# --- +# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group.11 + ''' + + SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, + count(*) as count + FROM events e + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 + ''' +# --- +# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group.12 + ''' + + SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, + count(*) as count + FROM events e + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 + ''' +# --- +# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group.13 + ''' + + SELECT aggregation_target AS actor_id + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + avg(step_2_conversion_time) step_2_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner, + median(step_2_conversion_time) step_2_median_conversion_time_inner, + prop + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target, + prop) as max_steps, + step_1_conversion_time, + step_2_conversion_time, + prop + FROM + (SELECT *, + arraySort([latest_0,latest_1,latest_2]) as event_times, + arraySum([if(latest_0 < latest_1 AND latest_1 <= latest_0 + INTERVAL 7 DAY, 1, 0),if(latest_0 < latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 1, 0), 1]) AS steps , + arraySort([latest_0,latest_1,latest_2]) as conversion_times, + if(isNotNull(conversion_times[2]) + AND conversion_times[2] <= conversion_times[1] + INTERVAL 7 DAY, dateDiff('second', conversion_times[1], conversion_times[2]), NULL) step_1_conversion_time, + if(isNotNull(conversion_times[3]) + AND conversion_times[3] <= conversion_times[2] + INTERVAL 7 DAY, dateDiff('second', conversion_times[2], conversion_times[3]), NULL) step_2_conversion_time + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , + if(has(['technology', 'finance'], prop), prop, 'Other') as prop + FROM + (SELECT *, + prop_vals as prop + FROM + (SELECT e.timestamp as timestamp, + pdi.person_id as aggregation_target, + pdi.person_id as person_id, + if(event = 'sign up', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'play movie', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'buy', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2, + replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, + prop_basic as prop, + argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals + FROM events e + INNER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 2 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) ))) + WHERE step_0 = 1 + UNION ALL SELECT *, + arraySort([latest_0,latest_1,latest_2]) as event_times, + arraySum([if(latest_0 < latest_1 AND latest_1 <= latest_0 + INTERVAL 7 DAY, 1, 0),if(latest_0 < latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 1, 0), 1]) AS steps , + arraySort([latest_0,latest_1,latest_2]) as conversion_times, + if(isNotNull(conversion_times[2]) + AND conversion_times[2] <= conversion_times[1] + INTERVAL 7 DAY, dateDiff('second', conversion_times[1], conversion_times[2]), NULL) step_1_conversion_time, + if(isNotNull(conversion_times[3]) + AND conversion_times[3] <= conversion_times[2] + INTERVAL 7 DAY, dateDiff('second', conversion_times[2], conversion_times[3]), NULL) step_2_conversion_time + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , + if(has(['technology', 'finance'], prop), prop, 'Other') as prop + FROM + (SELECT *, + prop_vals as prop + FROM + (SELECT e.timestamp as timestamp, + pdi.person_id as aggregation_target, + pdi.person_id as person_id, + if(event = 'play movie', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'buy', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'sign up', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2, + replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, + prop_basic as prop, + argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals + FROM events e + INNER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 2 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) ))) + WHERE step_0 = 1 + UNION ALL SELECT *, + arraySort([latest_0,latest_1,latest_2]) as event_times, + arraySum([if(latest_0 < latest_1 AND latest_1 <= latest_0 + INTERVAL 7 DAY, 1, 0),if(latest_0 < latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 1, 0), 1]) AS steps , + arraySort([latest_0,latest_1,latest_2]) as conversion_times, + if(isNotNull(conversion_times[2]) + AND conversion_times[2] <= conversion_times[1] + INTERVAL 7 DAY, dateDiff('second', conversion_times[1], conversion_times[2]), NULL) step_1_conversion_time, + if(isNotNull(conversion_times[3]) + AND conversion_times[3] <= conversion_times[2] + INTERVAL 7 DAY, dateDiff('second', conversion_times[2], conversion_times[3]), NULL) step_2_conversion_time + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , + if(has(['technology', 'finance'], prop), prop, 'Other') as prop + FROM + (SELECT *, + prop_vals as prop + FROM + (SELECT e.timestamp as timestamp, + pdi.person_id as aggregation_target, + pdi.person_id as person_id, + if(event = 'buy', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'sign up', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'play movie', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2, + replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, + prop_basic as prop, + argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals + FROM events e + INNER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 2 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) ))) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps, + prop + HAVING steps = max_steps) + WHERE steps IN [1, 2, 3] + AND arrayFlatten(array(prop)) = arrayFlatten(array('technology')) + ORDER BY aggregation_target + LIMIT 100 + OFFSET 0 SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000 + ''' +# --- +# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group.14 + ''' + + SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, + count(*) as count + FROM events e + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 + ''' +# --- +# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group.15 + ''' + + SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, + count(*) as count + FROM events e + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 + ''' +# --- +# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group.16 + ''' + + SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, + count(*) as count + FROM events e + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 + ''' +# --- +# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group.17 + ''' + + SELECT aggregation_target AS actor_id + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + avg(step_2_conversion_time) step_2_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner, + median(step_2_conversion_time) step_2_median_conversion_time_inner, + prop + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target, + prop) as max_steps, + step_1_conversion_time, + step_2_conversion_time, + prop + FROM + (SELECT *, + arraySort([latest_0,latest_1,latest_2]) as event_times, + arraySum([if(latest_0 < latest_1 AND latest_1 <= latest_0 + INTERVAL 7 DAY, 1, 0),if(latest_0 < latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 1, 0), 1]) AS steps , + arraySort([latest_0,latest_1,latest_2]) as conversion_times, + if(isNotNull(conversion_times[2]) + AND conversion_times[2] <= conversion_times[1] + INTERVAL 7 DAY, dateDiff('second', conversion_times[1], conversion_times[2]), NULL) step_1_conversion_time, + if(isNotNull(conversion_times[3]) + AND conversion_times[3] <= conversion_times[2] + INTERVAL 7 DAY, dateDiff('second', conversion_times[2], conversion_times[3]), NULL) step_2_conversion_time + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , + if(has(['technology', 'finance'], prop), prop, 'Other') as prop + FROM + (SELECT *, + prop_vals as prop + FROM + (SELECT e.timestamp as timestamp, + pdi.person_id as aggregation_target, + pdi.person_id as person_id, + if(event = 'sign up', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'play movie', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'buy', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2, + replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, + prop_basic as prop, + argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals + FROM events e + INNER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 2 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) ))) + WHERE step_0 = 1 + UNION ALL SELECT *, + arraySort([latest_0,latest_1,latest_2]) as event_times, + arraySum([if(latest_0 < latest_1 AND latest_1 <= latest_0 + INTERVAL 7 DAY, 1, 0),if(latest_0 < latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 1, 0), 1]) AS steps , + arraySort([latest_0,latest_1,latest_2]) as conversion_times, + if(isNotNull(conversion_times[2]) + AND conversion_times[2] <= conversion_times[1] + INTERVAL 7 DAY, dateDiff('second', conversion_times[1], conversion_times[2]), NULL) step_1_conversion_time, + if(isNotNull(conversion_times[3]) + AND conversion_times[3] <= conversion_times[2] + INTERVAL 7 DAY, dateDiff('second', conversion_times[2], conversion_times[3]), NULL) step_2_conversion_time + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , + if(has(['technology', 'finance'], prop), prop, 'Other') as prop + FROM + (SELECT *, + prop_vals as prop + FROM + (SELECT e.timestamp as timestamp, + pdi.person_id as aggregation_target, + pdi.person_id as person_id, + if(event = 'play movie', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'buy', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'sign up', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2, + replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, + prop_basic as prop, + argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals + FROM events e + INNER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 2 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) ))) + WHERE step_0 = 1 + UNION ALL SELECT *, + arraySort([latest_0,latest_1,latest_2]) as event_times, + arraySum([if(latest_0 < latest_1 AND latest_1 <= latest_0 + INTERVAL 7 DAY, 1, 0),if(latest_0 < latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 1, 0), 1]) AS steps , + arraySort([latest_0,latest_1,latest_2]) as conversion_times, + if(isNotNull(conversion_times[2]) + AND conversion_times[2] <= conversion_times[1] + INTERVAL 7 DAY, dateDiff('second', conversion_times[1], conversion_times[2]), NULL) step_1_conversion_time, + if(isNotNull(conversion_times[3]) + AND conversion_times[3] <= conversion_times[2] + INTERVAL 7 DAY, dateDiff('second', conversion_times[2], conversion_times[3]), NULL) step_2_conversion_time + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , + if(has(['technology', 'finance'], prop), prop, 'Other') as prop + FROM + (SELECT *, + prop_vals as prop + FROM + (SELECT e.timestamp as timestamp, + pdi.person_id as aggregation_target, + pdi.person_id as person_id, + if(event = 'buy', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'sign up', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'play movie', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2, + replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, + prop_basic as prop, + argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals + FROM events e + INNER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 2 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) ))) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps, + prop + HAVING steps = max_steps) + WHERE steps IN [2, 3] + AND arrayFlatten(array(prop)) = arrayFlatten(array('technology')) + ORDER BY aggregation_target + LIMIT 100 + OFFSET 0 SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000 + ''' +# --- +# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group.2 + ''' + + SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, + count(*) as count + FROM events e + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 + ''' +# --- +# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group.3 + ''' + + SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, + count(*) as count + FROM events e + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 + ''' +# --- +# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group.4 + ''' + + SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, + count(*) as count + FROM events e + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 + ''' +# --- +# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group.5 + ''' + + SELECT aggregation_target AS actor_id + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + avg(step_2_conversion_time) step_2_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner, + median(step_2_conversion_time) step_2_median_conversion_time_inner, + prop + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target, + prop) as max_steps, + step_1_conversion_time, + step_2_conversion_time, + prop + FROM + (SELECT *, + arraySort([latest_0,latest_1,latest_2]) as event_times, + arraySum([if(latest_0 < latest_1 AND latest_1 <= latest_0 + INTERVAL 7 DAY, 1, 0),if(latest_0 < latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 1, 0), 1]) AS steps , + arraySort([latest_0,latest_1,latest_2]) as conversion_times, + if(isNotNull(conversion_times[2]) + AND conversion_times[2] <= conversion_times[1] + INTERVAL 7 DAY, dateDiff('second', conversion_times[1], conversion_times[2]), NULL) step_1_conversion_time, + if(isNotNull(conversion_times[3]) + AND conversion_times[3] <= conversion_times[2] + INTERVAL 7 DAY, dateDiff('second', conversion_times[2], conversion_times[3]), NULL) step_2_conversion_time + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , + if(has(['technology', 'finance'], prop), prop, 'Other') as prop + FROM + (SELECT *, + prop_vals as prop + FROM + (SELECT e.timestamp as timestamp, + pdi.person_id as aggregation_target, + pdi.person_id as person_id, + if(event = 'sign up', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'play movie', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'buy', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2, + replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, + prop_basic as prop, + argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals + FROM events e + INNER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 2 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) ))) + WHERE step_0 = 1 + UNION ALL SELECT *, + arraySort([latest_0,latest_1,latest_2]) as event_times, + arraySum([if(latest_0 < latest_1 AND latest_1 <= latest_0 + INTERVAL 7 DAY, 1, 0),if(latest_0 < latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 1, 0), 1]) AS steps , + arraySort([latest_0,latest_1,latest_2]) as conversion_times, + if(isNotNull(conversion_times[2]) + AND conversion_times[2] <= conversion_times[1] + INTERVAL 7 DAY, dateDiff('second', conversion_times[1], conversion_times[2]), NULL) step_1_conversion_time, + if(isNotNull(conversion_times[3]) + AND conversion_times[3] <= conversion_times[2] + INTERVAL 7 DAY, dateDiff('second', conversion_times[2], conversion_times[3]), NULL) step_2_conversion_time + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , + if(has(['technology', 'finance'], prop), prop, 'Other') as prop + FROM + (SELECT *, + prop_vals as prop + FROM + (SELECT e.timestamp as timestamp, + pdi.person_id as aggregation_target, + pdi.person_id as person_id, + if(event = 'play movie', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'buy', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'sign up', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2, + replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, + prop_basic as prop, + argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals + FROM events e + INNER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 2 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) ))) + WHERE step_0 = 1 + UNION ALL SELECT *, + arraySort([latest_0,latest_1,latest_2]) as event_times, + arraySum([if(latest_0 < latest_1 AND latest_1 <= latest_0 + INTERVAL 7 DAY, 1, 0),if(latest_0 < latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 1, 0), 1]) AS steps , + arraySort([latest_0,latest_1,latest_2]) as conversion_times, + if(isNotNull(conversion_times[2]) + AND conversion_times[2] <= conversion_times[1] + INTERVAL 7 DAY, dateDiff('second', conversion_times[1], conversion_times[2]), NULL) step_1_conversion_time, + if(isNotNull(conversion_times[3]) + AND conversion_times[3] <= conversion_times[2] + INTERVAL 7 DAY, dateDiff('second', conversion_times[2], conversion_times[3]), NULL) step_2_conversion_time + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , + if(has(['technology', 'finance'], prop), prop, 'Other') as prop + FROM + (SELECT *, + prop_vals as prop + FROM + (SELECT e.timestamp as timestamp, + pdi.person_id as aggregation_target, + pdi.person_id as person_id, + if(event = 'buy', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'sign up', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'play movie', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2, + replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, + prop_basic as prop, + argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals + FROM events e + INNER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 2 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) ))) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps, + prop + HAVING steps = max_steps) + WHERE steps IN [1, 2, 3] + AND arrayFlatten(array(prop)) = arrayFlatten(array('finance')) + ORDER BY aggregation_target + LIMIT 100 + OFFSET 0 SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000 + ''' +# --- +# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group.6 + ''' + + SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, + count(*) as count + FROM events e + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 + ''' +# --- +# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group.7 + ''' + + SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, + count(*) as count + FROM events e + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 + ''' +# --- +# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group.8 + ''' + + SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, + count(*) as count + FROM events e + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 + ''' +# --- +# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group.9 + ''' + + SELECT aggregation_target AS actor_id + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + avg(step_2_conversion_time) step_2_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner, + median(step_2_conversion_time) step_2_median_conversion_time_inner, + prop + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target, + prop) as max_steps, + step_1_conversion_time, + step_2_conversion_time, + prop + FROM + (SELECT *, + arraySort([latest_0,latest_1,latest_2]) as event_times, + arraySum([if(latest_0 < latest_1 AND latest_1 <= latest_0 + INTERVAL 7 DAY, 1, 0),if(latest_0 < latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 1, 0), 1]) AS steps , + arraySort([latest_0,latest_1,latest_2]) as conversion_times, + if(isNotNull(conversion_times[2]) + AND conversion_times[2] <= conversion_times[1] + INTERVAL 7 DAY, dateDiff('second', conversion_times[1], conversion_times[2]), NULL) step_1_conversion_time, + if(isNotNull(conversion_times[3]) + AND conversion_times[3] <= conversion_times[2] + INTERVAL 7 DAY, dateDiff('second', conversion_times[2], conversion_times[3]), NULL) step_2_conversion_time + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , + if(has(['technology', 'finance'], prop), prop, 'Other') as prop + FROM + (SELECT *, + prop_vals as prop + FROM + (SELECT e.timestamp as timestamp, + pdi.person_id as aggregation_target, + pdi.person_id as person_id, + if(event = 'sign up', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'play movie', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'buy', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2, + replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, + prop_basic as prop, + argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals + FROM events e + INNER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 2 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) ))) + WHERE step_0 = 1 + UNION ALL SELECT *, + arraySort([latest_0,latest_1,latest_2]) as event_times, + arraySum([if(latest_0 < latest_1 AND latest_1 <= latest_0 + INTERVAL 7 DAY, 1, 0),if(latest_0 < latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 1, 0), 1]) AS steps , + arraySort([latest_0,latest_1,latest_2]) as conversion_times, + if(isNotNull(conversion_times[2]) + AND conversion_times[2] <= conversion_times[1] + INTERVAL 7 DAY, dateDiff('second', conversion_times[1], conversion_times[2]), NULL) step_1_conversion_time, + if(isNotNull(conversion_times[3]) + AND conversion_times[3] <= conversion_times[2] + INTERVAL 7 DAY, dateDiff('second', conversion_times[2], conversion_times[3]), NULL) step_2_conversion_time + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , + if(has(['technology', 'finance'], prop), prop, 'Other') as prop + FROM + (SELECT *, + prop_vals as prop + FROM + (SELECT e.timestamp as timestamp, + pdi.person_id as aggregation_target, + pdi.person_id as person_id, + if(event = 'play movie', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'buy', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'sign up', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2, + replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, + prop_basic as prop, + argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals + FROM events e + INNER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 2 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) ))) + WHERE step_0 = 1 + UNION ALL SELECT *, + arraySort([latest_0,latest_1,latest_2]) as event_times, + arraySum([if(latest_0 < latest_1 AND latest_1 <= latest_0 + INTERVAL 7 DAY, 1, 0),if(latest_0 < latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 1, 0), 1]) AS steps , + arraySort([latest_0,latest_1,latest_2]) as conversion_times, + if(isNotNull(conversion_times[2]) + AND conversion_times[2] <= conversion_times[1] + INTERVAL 7 DAY, dateDiff('second', conversion_times[1], conversion_times[2]), NULL) step_1_conversion_time, + if(isNotNull(conversion_times[3]) + AND conversion_times[3] <= conversion_times[2] + INTERVAL 7 DAY, dateDiff('second', conversion_times[2], conversion_times[3]), NULL) step_2_conversion_time + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , + if(has(['technology', 'finance'], prop), prop, 'Other') as prop + FROM + (SELECT *, + prop_vals as prop + FROM + (SELECT e.timestamp as timestamp, + pdi.person_id as aggregation_target, + pdi.person_id as person_id, + if(event = 'buy', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'sign up', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'play movie', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2, + replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, + prop_basic as prop, + argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals + FROM events e + INNER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 2 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) ))) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps, + prop + HAVING steps = max_steps) + WHERE steps IN [2, 3] + AND arrayFlatten(array(prop)) = arrayFlatten(array('finance')) + ORDER BY aggregation_target + LIMIT 100 + OFFSET 0 SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000 + ''' +# --- diff --git a/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py b/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py index 39359a906656b..4d00b0a265b98 100644 --- a/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py +++ b/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py @@ -51,29 +51,6 @@ def _get_actor_ids_at_step(self, filter, funnel_step, breakdown_value=None): return [val["id"] for val in serialized_result] - def _create_groups(self): - GroupTypeMapping.objects.create(team=self.team, group_type="organization", group_type_index=0) - GroupTypeMapping.objects.create(team=self.team, group_type="company", group_type_index=1) - - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key="org:5", - properties={"industry": "finance"}, - ) - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key="org:6", - properties={"industry": "technology"}, - ) - create_group( - team_id=self.team.pk, - group_type_index=1, - group_key="org:5", - properties={"industry": "random"}, - ) - def _assert_funnel_breakdown_result_is_correct(self, result, steps: List[FunnelStepResult]): def funnel_result(step: FunnelStepResult, order: int) -> Dict[str, Any]: return { @@ -2681,6 +2658,73 @@ def test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step(self): self.assertCountEqual([res[0]["breakdown"] for res in results], [["Mac"], ["Safari"]]) + return TestFunnelBreakdown + + +def funnel_breakdown_group_test_factory(FunnelPerson): + funnel_order_type = FunnelOrderType.ORDERED + + class TestFunnelBreakdownGroup(APIBaseTest): + def _get_actor_ids_at_step(self, filter, funnel_step, breakdown_value=None): + filter = Filter(data=filter, team=self.team) + person_filter = filter.shallow_clone({"funnel_step": funnel_step, "funnel_step_breakdown": breakdown_value}) + _, serialized_result, _ = FunnelPerson(person_filter, self.team).get_actors() + + return [val["id"] for val in serialized_result] + + def _create_groups(self): + GroupTypeMapping.objects.create(team=self.team, group_type="organization", group_type_index=0) + GroupTypeMapping.objects.create(team=self.team, group_type="company", group_type_index=1) + + create_group( + team_id=self.team.pk, + group_type_index=0, + group_key="org:5", + properties={"industry": "finance"}, + ) + create_group( + team_id=self.team.pk, + group_type_index=0, + group_key="org:6", + properties={"industry": "technology"}, + ) + create_group( + team_id=self.team.pk, + group_type_index=1, + group_key="org:5", + properties={"industry": "random"}, + ) + + def _assert_funnel_breakdown_result_is_correct(self, result, steps: List[FunnelStepResult]): + def funnel_result(step: FunnelStepResult, order: int) -> Dict[str, Any]: + return { + "action_id": step.name if step.type == "events" else step.action_id, + "name": step.name, + "custom_name": None, + "order": order, + "people": [], + "count": step.count, + "type": step.type, + "average_conversion_time": step.average_conversion_time, + "median_conversion_time": step.median_conversion_time, + "breakdown": step.breakdown, + "breakdown_value": step.breakdown, + **( + { + "action_id": None, + "name": f"Completed {order+1} step{'s' if order > 0 else ''}", + } + if funnel_order_type == FunnelOrderType.UNORDERED + else {} + ), + } + + step_results = [] + for index, step_result in enumerate(steps): + step_results.append(funnel_result(step_result, index)) + + assert_funnel_results_equal(result, step_results) + @snapshot_clickhouse_queries def test_funnel_breakdown_group(self): self._create_groups() @@ -3020,7 +3064,7 @@ def test_funnel_aggregate_by_groups_breakdown_group_person_on_events(self): ], ) - return TestFunnelBreakdown + return TestFunnelBreakdownGroup def sort_breakdown_funnel_results(results: List[Dict[int, Any]]): diff --git a/posthog/hogql_queries/insights/funnels/test/test_funnel.py b/posthog/hogql_queries/insights/funnels/test/test_funnel.py index 8c374eacfdb2d..119afafbf4f71 100644 --- a/posthog/hogql_queries/insights/funnels/test/test_funnel.py +++ b/posthog/hogql_queries/insights/funnels/test/test_funnel.py @@ -32,8 +32,9 @@ funnel_conversion_time_test_factory, ) from posthog.hogql_queries.insights.funnels.test.breakdown_cases import ( - assert_funnel_results_equal, funnel_breakdown_test_factory, + funnel_breakdown_group_test_factory, + assert_funnel_results_equal, ) from posthog.hogql_queries.insights.funnels import Funnel from posthog.test.test_journeys import journeys_for @@ -61,6 +62,15 @@ class TestFunnelBreakdown( pass +class TestFunnelGroupBreakdown( + ClickhouseTestMixin, + funnel_breakdown_group_test_factory( # type: ignore + ClickhouseFunnelActors, + ), +): + pass + + class TestFunnelConversionTime( ClickhouseTestMixin, funnel_conversion_time_test_factory(FunnelOrderType.ORDERED, ClickhouseFunnelActors), # type: ignore diff --git a/posthog/hogql_queries/insights/funnels/test/test_funnel_strict.py b/posthog/hogql_queries/insights/funnels/test/test_funnel_strict.py index 9038576a9ebfd..a673eb0cddb62 100644 --- a/posthog/hogql_queries/insights/funnels/test/test_funnel_strict.py +++ b/posthog/hogql_queries/insights/funnels/test/test_funnel_strict.py @@ -8,8 +8,9 @@ ) from posthog.hogql_queries.insights.funnels.test.breakdown_cases import ( - assert_funnel_results_equal, funnel_breakdown_test_factory, + funnel_breakdown_group_test_factory, + assert_funnel_results_equal, ) from posthog.hogql_queries.legacy_compatibility.filter_to_query import filter_to_query from posthog.models.action import Action @@ -179,6 +180,15 @@ def test_strict_breakdown_events_with_multiple_properties(self): self.assertCountEqual(self._get_actor_ids_at_step(filters, 2, ["Safari"]), [people["person2"].uuid]) +class TestStrictFunnelGroupBreakdown( + ClickhouseTestMixin, + funnel_breakdown_group_test_factory( # type: ignore + ClickhouseFunnelStrictActors, + ), +): + pass + + class TestFunnelStrictStepsConversionTime( ClickhouseTestMixin, funnel_conversion_time_test_factory(FunnelOrderType.ORDERED, ClickhouseFunnelStrictActors), # type: ignore diff --git a/posthog/hogql_queries/insights/funnels/test/test_funnel_unordered.py b/posthog/hogql_queries/insights/funnels/test/test_funnel_unordered.py index ae72ba3ab37b3..36e5d87f39e49 100644 --- a/posthog/hogql_queries/insights/funnels/test/test_funnel_unordered.py +++ b/posthog/hogql_queries/insights/funnels/test/test_funnel_unordered.py @@ -1,4 +1,4 @@ -# from datetime import datetime +from datetime import datetime from typing import cast from rest_framework.exceptions import ValidationError @@ -7,8 +7,8 @@ from posthog.hogql_queries.insights.funnels.funnels_query_runner import FunnelsQueryRunner from posthog.hogql_queries.legacy_compatibility.filter_to_query import filter_to_query -# from posthog.models.action import Action -# from posthog.models.action_step import ActionStep +from posthog.models.action import Action +from posthog.models.action_step import ActionStep from posthog.models.filters import Filter from posthog.models.property_definition import PropertyDefinition from posthog.queries.funnels.funnel_unordered_persons import ( @@ -19,619 +19,630 @@ ) from posthog.schema import FunnelsQuery -# from posthog.hogql_queries.insights.funnels.test.breakdown_cases import ( -# assert_funnel_results_equal, -# funnel_breakdown_test_factory, -# ) +from posthog.hogql_queries.insights.funnels.test.breakdown_cases import ( + FunnelStepResult, + funnel_breakdown_test_factory, + funnel_breakdown_group_test_factory, + assert_funnel_results_equal, +) from posthog.test.base import ( APIBaseTest, ClickhouseTestMixin, _create_event, _create_person, - # snapshot_clickhouse_queries, + snapshot_clickhouse_queries, ) -# from posthog.test.test_journeys import journeys_for +from posthog.test.test_journeys import journeys_for FORMAT_TIME = "%Y-%m-%d 00:00:00" -# def _create_action(**kwargs): -# team = kwargs.pop("team") -# name = kwargs.pop("name") -# properties = kwargs.pop("properties", {}) -# action = Action.objects.create(team=team, name=name) -# ActionStep.objects.create(action=action, event=name, properties=properties) -# return action - - -# class TestFunnelUnorderedStepsBreakdown( -# ClickhouseTestMixin, -# funnel_breakdown_test_factory( # type: ignore -# FunnelUnordered, -# ClickhouseFunnelUnorderedActors, -# _create_event, -# _create_action, -# _create_person, -# ), -# ): -# maxDiff = None - -# def test_funnel_step_breakdown_event_single_person_events_with_multiple_properties(self): -# # overriden from factory - -# filters = { -# "events": [{"id": "sign up", "order": 0}, {"id": "play movie", "order": 1}], -# "insight": INSIGHT_FUNNELS, -# "date_from": "2020-01-01", -# "date_to": "2020-01-08", -# "funnel_window_days": 7, -# "breakdown_type": "event", -# "breakdown": "$browser", -# "breakdown_attribution_type": "all_events", -# } - -# # event -# person1 = _create_person(distinct_ids=["person1"], team_id=self.team.pk) -# _create_event( -# team=self.team, -# event="sign up", -# distinct_id="person1", -# properties={"key": "val", "$browser": "Chrome"}, -# timestamp="2020-01-01T12:00:00Z", -# ) -# _create_event( -# team=self.team, -# event="sign up", -# distinct_id="person1", -# properties={"key": "val", "$browser": "Safari"}, -# timestamp="2020-01-02T13:00:00Z", -# ) -# _create_event( -# team=self.team, -# event="play movie", -# distinct_id="person1", -# properties={"key": "val", "$browser": "Safari"}, -# timestamp="2020-01-02T14:00:00Z", -# ) - -# query = cast(FunnelsQuery, filter_to_query(filters)) -# results = FunnelsQueryRunner(query=query, team=self.team).calculate().results - -# assert_funnel_results_equal( -# results[0], -# [ -# { -# "action_id": None, -# "name": "Completed 1 step", -# "custom_name": None, -# "order": 0, -# "people": [], -# "count": 1, -# "type": "events", -# "average_conversion_time": None, -# "median_conversion_time": None, -# "breakdown": ["Chrome"], -# "breakdown_value": ["Chrome"], -# }, -# { -# "action_id": None, -# "name": "Completed 2 steps", -# "custom_name": None, -# "order": 1, -# "people": [], -# "count": 0, -# "type": "events", -# "average_conversion_time": None, -# "median_conversion_time": None, -# "breakdown": ["Chrome"], -# "breakdown_value": ["Chrome"], -# }, -# ], -# ) -# self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, ["Chrome"]), [person1.uuid]) -# self.assertCountEqual(self._get_actor_ids_at_step(filters, 2, ["Chrome"]), []) - -# assert_funnel_results_equal( -# results[1], -# [ -# { -# "action_id": None, -# "name": "Completed 1 step", -# "custom_name": None, -# "order": 0, -# "people": [], -# "count": 1, -# "type": "events", -# "average_conversion_time": None, -# "median_conversion_time": None, -# "breakdown": ["Safari"], -# "breakdown_value": ["Safari"], -# }, -# { -# "action_id": None, -# "name": "Completed 2 steps", -# "custom_name": None, -# "order": 1, -# "people": [], -# "count": 1, -# "type": "events", -# "average_conversion_time": 3600, -# "median_conversion_time": 3600, -# "breakdown": ["Safari"], -# "breakdown_value": ["Safari"], -# }, -# ], -# ) -# self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, ["Safari"]), [person1.uuid]) -# self.assertCountEqual(self._get_actor_ids_at_step(filters, 2, ["Safari"]), [person1.uuid]) - -# def test_funnel_step_breakdown_with_step_attribution(self): -# # overridden from factory, since with no order, step one is step zero, and vice versa - -# filters = { -# "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], -# "insight": INSIGHT_FUNNELS, -# "date_from": "2020-01-01", -# "date_to": "2020-01-08", -# "funnel_window_days": 7, -# "breakdown_type": "event", -# "breakdown": ["$browser"], -# "breakdown_attribution_type": "step", -# "breakdown_attribution_value": "0", -# "funnel_order_type": "unordered", -# } - -# # event -# events_by_person = { -# "person1": [ -# { -# "event": "sign up", -# "timestamp": datetime(2020, 1, 1, 12), -# "properties": {"$browser": "Chrome"}, -# }, -# {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, -# ], -# "person2": [ -# {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, -# { -# "event": "buy", -# "timestamp": datetime(2020, 1, 2, 13), -# "properties": {"$browser": "Safari"}, -# }, -# ], -# "person3": [ -# { -# "event": "sign up", -# "timestamp": datetime(2020, 1, 2, 14), -# "properties": {"$browser": "Mac"}, -# }, -# {"event": "buy", "timestamp": datetime(2020, 1, 2, 15)}, -# ], -# "person4": [ -# { -# "event": "sign up", -# "timestamp": datetime(2020, 1, 2, 15), -# "properties": {"$browser": 0}, -# }, -# # step attribution means alakazam is valid when step = 1 -# { -# "event": "buy", -# "timestamp": datetime(2020, 1, 2, 16), -# "properties": {"$browser": "alakazam"}, -# }, -# ], -# } -# people = journeys_for(events_by_person, self.team) - -# query = cast(FunnelsQuery, filter_to_query(filters)) -# results = FunnelsQueryRunner(query=query, team=self.team).calculate().results -# results = sorted(results, key=lambda res: res[0]["breakdown"]) - -# self.assertEqual(len(results), 6) - -# self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "Mac"), [people["person3"].uuid]) - -# def test_funnel_step_breakdown_with_step_one_attribution(self): -# # overridden from factory, since with no order, step one is step zero, and vice versa -# filters = { -# "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], -# "insight": INSIGHT_FUNNELS, -# "date_from": "2020-01-01", -# "date_to": "2020-01-08", -# "funnel_window_days": 7, -# "breakdown_type": "event", -# "breakdown": ["$browser"], -# "breakdown_attribution_type": "step", -# "breakdown_attribution_value": "1", -# "funnel_order_type": "unordered", -# } - -# # event -# events_by_person = { -# "person1": [ -# { -# "event": "sign up", -# "timestamp": datetime(2020, 1, 1, 12), -# "properties": {"$browser": "Chrome"}, -# }, -# {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, -# ], -# "person2": [ -# {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, -# { -# "event": "buy", -# "timestamp": datetime(2020, 1, 2, 13), -# "properties": {"$browser": "Safari"}, -# }, -# ], -# "person3": [ -# { -# "event": "sign up", -# "timestamp": datetime(2020, 1, 2, 14), -# "properties": {"$browser": "Mac"}, -# }, -# {"event": "buy", "timestamp": datetime(2020, 1, 2, 15)}, -# ], -# "person4": [ -# { -# "event": "sign up", -# "timestamp": datetime(2020, 1, 2, 15), -# "properties": {"$browser": 0}, -# }, -# # step attribution means alakazam is valid when step = 1 -# { -# "event": "buy", -# "timestamp": datetime(2020, 1, 2, 16), -# "properties": {"$browser": "alakazam"}, -# }, -# ], -# } -# people = journeys_for(events_by_person, self.team) - -# query = cast(FunnelsQuery, filter_to_query(filters)) -# results = FunnelsQueryRunner(query=query, team=self.team).calculate().results -# results = sorted(results, key=lambda res: res[0]["breakdown"]) - -# self.assertEqual(len(results), 6) -# # unordered, so everything is step one too. - -# self._assert_funnel_breakdown_result_is_correct( -# results[0], -# [ -# FunnelStepResult(name="Completed 1 step", breakdown=[""], count=3), -# FunnelStepResult( -# name="Completed 2 steps", -# breakdown=[""], -# count=2, -# average_conversion_time=3600, -# median_conversion_time=3600, -# ), -# ], -# ) - -# self.assertCountEqual( -# self._get_actor_ids_at_step(filters, 1, ""), -# [people["person1"].uuid, people["person2"].uuid, people["person3"].uuid], -# ) -# self.assertCountEqual( -# self._get_actor_ids_at_step(filters, 2, ""), -# [people["person1"].uuid, people["person3"].uuid], -# ) - -# self._assert_funnel_breakdown_result_is_correct( -# results[1], -# [ -# FunnelStepResult(name="Completed 1 step", breakdown=["0"], count=1), -# FunnelStepResult(name="Completed 2 steps", breakdown=["0"], count=0), -# ], -# ) - -# self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "0"), [people["person4"].uuid]) - -# def test_funnel_step_breakdown_with_step_one_attribution_incomplete_funnel(self): -# # overridden from factory, since with no order, step one is step zero, and vice versa - -# filters = { -# "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], -# "insight": INSIGHT_FUNNELS, -# "date_from": "2020-01-01", -# "date_to": "2020-01-08", -# "funnel_window_days": 7, -# "breakdown_type": "event", -# "breakdown": ["$browser"], -# "breakdown_attribution_type": "step", -# "breakdown_attribution_value": "1", -# "funnel_order_type": "unordered", -# } - -# # event -# events_by_person = { -# "person1": [ -# { -# "event": "sign up", -# "timestamp": datetime(2020, 1, 1, 12), -# "properties": {"$browser": "Chrome"}, -# }, -# {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, -# ], -# "person2": [ -# {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, -# # {"event": "buy", "timestamp": datetime(2020, 1, 2, 13), "properties": {"$browser": "Safari"}} -# ], -# "person3": [ -# { -# "event": "sign up", -# "timestamp": datetime(2020, 1, 2, 14), -# "properties": {"$browser": "Mac"}, -# }, -# # {"event": "buy", "timestamp": datetime(2020, 1, 2, 15)} -# ], -# "person4": [ -# { -# "event": "sign up", -# "timestamp": datetime(2020, 1, 2, 15), -# "properties": {"$browser": 0}, -# }, -# # step attribution means alakazam is valid when step = 1 -# { -# "event": "buy", -# "timestamp": datetime(2020, 1, 2, 16), -# "properties": {"$browser": "alakazam"}, -# }, -# ], -# } -# people = journeys_for(events_by_person, self.team) - -# query = cast(FunnelsQuery, filter_to_query(filters)) -# results = FunnelsQueryRunner(query=query, team=self.team).calculate().results -# results = sorted(results, key=lambda res: res[0]["breakdown"]) - -# # Breakdown by step_1 means funnel items that never reach step_1 are NULLed out -# self.assertEqual(len(results), 4) -# # Chrome and Mac and Safari goes away - -# self._assert_funnel_breakdown_result_is_correct( -# results[0], -# [ -# FunnelStepResult(name="Completed 1 step", breakdown=[""], count=1), -# FunnelStepResult( -# name="Completed 2 steps", -# breakdown=[""], -# count=1, -# average_conversion_time=3600, -# median_conversion_time=3600, -# ), -# ], -# ) - -# self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, ""), [people["person1"].uuid]) - -# self._assert_funnel_breakdown_result_is_correct( -# results[1], -# [ -# FunnelStepResult(name="Completed 1 step", breakdown=["0"], count=1), -# FunnelStepResult(name="Completed 2 steps", breakdown=["0"], count=0), -# ], -# ) - -# self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "0"), [people["person4"].uuid]) - -# self._assert_funnel_breakdown_result_is_correct( -# results[2], -# [ -# FunnelStepResult(name="Completed 1 step", breakdown=["Chrome"], count=1), -# FunnelStepResult(name="Completed 2 steps", breakdown=["Chrome"], count=0), -# ], -# ) - -# self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "Chrome"), [people["person1"].uuid]) - -# self._assert_funnel_breakdown_result_is_correct( -# results[3], -# [ -# FunnelStepResult(name="Completed 1 step", breakdown=["alakazam"], count=1), -# FunnelStepResult( -# name="Completed 2 steps", -# breakdown=["alakazam"], -# count=1, -# average_conversion_time=3600, -# median_conversion_time=3600, -# ), -# ], -# ) - -# self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "alakazam"), [people["person4"].uuid]) - -# def test_funnel_step_non_array_breakdown_with_step_one_attribution_incomplete_funnel(self): -# # overridden from factory, since with no order, step one is step zero, and vice versa - -# filters = { -# "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], -# "insight": INSIGHT_FUNNELS, -# "date_from": "2020-01-01", -# "date_to": "2020-01-08", -# "funnel_window_days": 7, -# "breakdown_type": "event", -# "breakdown": "$browser", -# "breakdown_attribution_type": "step", -# "breakdown_attribution_value": "1", -# "funnel_order_type": "unordered", -# } - -# # event -# events_by_person = { -# "person1": [ -# { -# "event": "sign up", -# "timestamp": datetime(2020, 1, 1, 12), -# "properties": {"$browser": "Chrome"}, -# }, -# {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, -# ], -# "person2": [ -# {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, -# # {"event": "buy", "timestamp": datetime(2020, 1, 2, 13), "properties": {"$browser": "Safari"}} -# ], -# "person3": [ -# { -# "event": "sign up", -# "timestamp": datetime(2020, 1, 2, 14), -# "properties": {"$browser": "Mac"}, -# }, -# # {"event": "buy", "timestamp": datetime(2020, 1, 2, 15)} -# ], -# "person4": [ -# { -# "event": "sign up", -# "timestamp": datetime(2020, 1, 2, 15), -# "properties": {"$browser": 0}, -# }, -# # step attribution means alakazam is valid when step = 1 -# { -# "event": "buy", -# "timestamp": datetime(2020, 1, 2, 16), -# "properties": {"$browser": "alakazam"}, -# }, -# ], -# } -# people = journeys_for(events_by_person, self.team) - -# query = cast(FunnelsQuery, filter_to_query(filters)) -# results = FunnelsQueryRunner(query=query, team=self.team).calculate().results -# results = sorted(results, key=lambda res: res[0]["breakdown"]) - -# # Breakdown by step_1 means funnel items that never reach step_1 are NULLed out -# self.assertEqual(len(results), 4) -# # Chrome and Mac and Safari goes away - -# self._assert_funnel_breakdown_result_is_correct( -# results[0], -# [ -# FunnelStepResult(name="Completed 1 step", breakdown=[""], count=1), -# FunnelStepResult( -# name="Completed 2 steps", -# breakdown=[""], -# count=1, -# average_conversion_time=3600, -# median_conversion_time=3600, -# ), -# ], -# ) - -# self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, ""), [people["person1"].uuid]) - -# self._assert_funnel_breakdown_result_is_correct( -# results[1], -# [ -# FunnelStepResult(name="Completed 1 step", breakdown=["0"], count=1), -# FunnelStepResult(name="Completed 2 steps", breakdown=["0"], count=0), -# ], -# ) - -# self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "0"), [people["person4"].uuid]) - -# self._assert_funnel_breakdown_result_is_correct( -# results[2], -# [ -# FunnelStepResult(name="Completed 1 step", breakdown=["Chrome"], count=1), -# FunnelStepResult(name="Completed 2 steps", breakdown=["Chrome"], count=0), -# ], -# ) - -# self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "Chrome"), [people["person1"].uuid]) - -# self._assert_funnel_breakdown_result_is_correct( -# results[3], -# [ -# FunnelStepResult(name="Completed 1 step", breakdown=["alakazam"], count=1), -# FunnelStepResult( -# name="Completed 2 steps", -# breakdown=["alakazam"], -# count=1, -# average_conversion_time=3600, -# median_conversion_time=3600, -# ), -# ], -# ) - -# self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "alakazam"), [people["person4"].uuid]) - -# @snapshot_clickhouse_queries -# def test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step(self): -# # No person querying here, so snapshots are more legible -# # overridden from factory, since we need to add `funnel_order_type` - -# filters = { -# "events": [ -# {"id": "sign up", "order": 0}, -# { -# "id": "buy", -# "properties": [{"type": "event", "key": "$version", "value": "xyz"}], -# "order": 1, -# }, -# ], -# "insight": INSIGHT_FUNNELS, -# "date_from": "2020-01-01", -# "date_to": "2020-01-08", -# "funnel_window_days": 7, -# "breakdown_type": "event", -# "breakdown": "$browser", -# "breakdown_attribution_type": "step", -# "breakdown_attribution_value": "1", -# "funnel_order_type": "unordered", -# } - -# # event -# events_by_person = { -# "person1": [ -# { -# "event": "sign up", -# "timestamp": datetime(2020, 1, 1, 12), -# "properties": {"$browser": "Chrome", "$version": "xyz"}, -# }, -# { -# "event": "buy", -# "timestamp": datetime(2020, 1, 1, 13), -# "properties": {"$browser": "Chrome"}, -# }, -# # discarded because doesn't meet criteria -# ], -# "person2": [ -# {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, -# { -# "event": "buy", -# "timestamp": datetime(2020, 1, 2, 13), -# "properties": {"$browser": "Safari", "$version": "xyz"}, -# }, -# ], -# "person3": [ -# { -# "event": "sign up", -# "timestamp": datetime(2020, 1, 2, 14), -# "properties": {"$browser": "Mac"}, -# }, -# { -# "event": "buy", -# "timestamp": datetime(2020, 1, 2, 15), -# "properties": {"$version": "xyz", "$browser": "Mac"}, -# }, -# ], -# # no properties dude, doesn't make it to step 1, and since breakdown on step 1, is discarded completely -# "person5": [ -# {"event": "sign up", "timestamp": datetime(2020, 1, 2, 15)}, -# {"event": "buy", "timestamp": datetime(2020, 1, 2, 16)}, -# ], -# } -# journeys_for(events_by_person, self.team) - -# query = cast(FunnelsQuery, filter_to_query(filters)) -# results = FunnelsQueryRunner(query=query, team=self.team).calculate().results -# results = sorted(results, key=lambda res: res[0]["breakdown"]) - -# self.assertEqual(len(results), 3) - -# self.assertCountEqual([res[0]["breakdown"] for res in results], [[""], ["Mac"], ["Safari"]]) +def _create_action(**kwargs): + team = kwargs.pop("team") + name = kwargs.pop("name") + properties = kwargs.pop("properties", {}) + action = Action.objects.create(team=team, name=name) + ActionStep.objects.create(action=action, event=name, properties=properties) + return action + + +class TestFunnelUnorderedStepsBreakdown( + ClickhouseTestMixin, + funnel_breakdown_test_factory( # type: ignore + FunnelOrderType.UNORDERED, + ClickhouseFunnelUnorderedActors, + _create_action, + _create_person, + ), +): + maxDiff = None + + def test_funnel_step_breakdown_event_single_person_events_with_multiple_properties(self): + # overriden from factory + + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": "unordered", + "events": [{"id": "sign up", "order": 0}, {"id": "play movie", "order": 1}], + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": "$browser", + "breakdown_attribution_type": "all_events", + } + + # event + person1 = _create_person(distinct_ids=["person1"], team_id=self.team.pk) + _create_event( + team=self.team, + event="sign up", + distinct_id="person1", + properties={"key": "val", "$browser": "Chrome"}, + timestamp="2020-01-01T12:00:00Z", + ) + _create_event( + team=self.team, + event="sign up", + distinct_id="person1", + properties={"key": "val", "$browser": "Safari"}, + timestamp="2020-01-02T13:00:00Z", + ) + _create_event( + team=self.team, + event="play movie", + distinct_id="person1", + properties={"key": "val", "$browser": "Safari"}, + timestamp="2020-01-02T14:00:00Z", + ) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + assert_funnel_results_equal( + results[0], + [ + { + "action_id": None, + "name": "Completed 1 step", + "custom_name": None, + "order": 0, + "people": [], + "count": 1, + "type": "events", + "average_conversion_time": None, + "median_conversion_time": None, + "breakdown": ["Chrome"], + "breakdown_value": ["Chrome"], + }, + { + "action_id": None, + "name": "Completed 2 steps", + "custom_name": None, + "order": 1, + "people": [], + "count": 0, + "type": "events", + "average_conversion_time": None, + "median_conversion_time": None, + "breakdown": ["Chrome"], + "breakdown_value": ["Chrome"], + }, + ], + ) + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, ["Chrome"]), [person1.uuid]) + self.assertCountEqual(self._get_actor_ids_at_step(filters, 2, ["Chrome"]), []) + + assert_funnel_results_equal( + results[1], + [ + { + "action_id": None, + "name": "Completed 1 step", + "custom_name": None, + "order": 0, + "people": [], + "count": 1, + "type": "events", + "average_conversion_time": None, + "median_conversion_time": None, + "breakdown": ["Safari"], + "breakdown_value": ["Safari"], + }, + { + "action_id": None, + "name": "Completed 2 steps", + "custom_name": None, + "order": 1, + "people": [], + "count": 1, + "type": "events", + "average_conversion_time": 3600, + "median_conversion_time": 3600, + "breakdown": ["Safari"], + "breakdown_value": ["Safari"], + }, + ], + ) + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, ["Safari"]), [person1.uuid]) + self.assertCountEqual(self._get_actor_ids_at_step(filters, 2, ["Safari"]), [person1.uuid]) + + def test_funnel_step_breakdown_with_step_attribution(self): + # overridden from factory, since with no order, step one is step zero, and vice versa + + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": "unordered", + "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": ["$browser"], + "breakdown_attribution_type": "step", + "breakdown_attribution_value": "0", + } + + # event + events_by_person = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$browser": "Chrome"}, + }, + {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, + ], + "person2": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 13), + "properties": {"$browser": "Safari"}, + }, + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Mac"}, + }, + {"event": "buy", "timestamp": datetime(2020, 1, 2, 15)}, + ], + "person4": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 15), + "properties": {"$browser": 0}, + }, + # step attribution means alakazam is valid when step = 1 + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 16), + "properties": {"$browser": "alakazam"}, + }, + ], + } + people = journeys_for(events_by_person, self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + results = sorted(results, key=lambda res: res[0]["breakdown"]) + + self.assertEqual(len(results), 6) + + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "Mac"), [people["person3"].uuid]) + + def test_funnel_step_breakdown_with_step_one_attribution(self): + # overridden from factory, since with no order, step one is step zero, and vice versa + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": "unordered", + "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": ["$browser"], + "breakdown_attribution_type": "step", + "breakdown_attribution_value": "1", + } + + # event + events_by_person = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$browser": "Chrome"}, + }, + {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, + ], + "person2": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 13), + "properties": {"$browser": "Safari"}, + }, + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Mac"}, + }, + {"event": "buy", "timestamp": datetime(2020, 1, 2, 15)}, + ], + "person4": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 15), + "properties": {"$browser": 0}, + }, + # step attribution means alakazam is valid when step = 1 + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 16), + "properties": {"$browser": "alakazam"}, + }, + ], + } + people = journeys_for(events_by_person, self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + results = sorted(results, key=lambda res: res[0]["breakdown"]) + + self.assertEqual(len(results), 6) + # unordered, so everything is step one too. + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="Completed 1 step", breakdown=[""], count=3), + FunnelStepResult( + name="Completed 2 steps", + breakdown=[""], + count=2, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, ""), + [people["person1"].uuid, people["person2"].uuid, people["person3"].uuid], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 2, ""), + [people["person1"].uuid, people["person3"].uuid], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="Completed 1 step", breakdown=["0"], count=1), + FunnelStepResult(name="Completed 2 steps", breakdown=["0"], count=0), + ], + ) + + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "0"), [people["person4"].uuid]) + + def test_funnel_step_breakdown_with_step_one_attribution_incomplete_funnel(self): + # overridden from factory, since with no order, step one is step zero, and vice versa + + filters = { + "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": ["$browser"], + "breakdown_attribution_type": "step", + "breakdown_attribution_value": "1", + "funnel_order_type": "unordered", + } + + # event + events_by_person = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$browser": "Chrome"}, + }, + {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, + ], + "person2": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + # {"event": "buy", "timestamp": datetime(2020, 1, 2, 13), "properties": {"$browser": "Safari"}} + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Mac"}, + }, + # {"event": "buy", "timestamp": datetime(2020, 1, 2, 15)} + ], + "person4": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 15), + "properties": {"$browser": 0}, + }, + # step attribution means alakazam is valid when step = 1 + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 16), + "properties": {"$browser": "alakazam"}, + }, + ], + } + people = journeys_for(events_by_person, self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + results = sorted(results, key=lambda res: res[0]["breakdown"]) + + # Breakdown by step_1 means funnel items that never reach step_1 are NULLed out + self.assertEqual(len(results), 4) + # Chrome and Mac and Safari goes away + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="Completed 1 step", breakdown=[""], count=1), + FunnelStepResult( + name="Completed 2 steps", + breakdown=[""], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, ""), [people["person1"].uuid]) + + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="Completed 1 step", breakdown=["0"], count=1), + FunnelStepResult(name="Completed 2 steps", breakdown=["0"], count=0), + ], + ) + + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "0"), [people["person4"].uuid]) + + self._assert_funnel_breakdown_result_is_correct( + results[2], + [ + FunnelStepResult(name="Completed 1 step", breakdown=["Chrome"], count=1), + FunnelStepResult(name="Completed 2 steps", breakdown=["Chrome"], count=0), + ], + ) + + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "Chrome"), [people["person1"].uuid]) + + self._assert_funnel_breakdown_result_is_correct( + results[3], + [ + FunnelStepResult(name="Completed 1 step", breakdown=["alakazam"], count=1), + FunnelStepResult( + name="Completed 2 steps", + breakdown=["alakazam"], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "alakazam"), [people["person4"].uuid]) + + def test_funnel_step_non_array_breakdown_with_step_one_attribution_incomplete_funnel(self): + # overridden from factory, since with no order, step one is step zero, and vice versa + + filters = { + "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": "$browser", + "breakdown_attribution_type": "step", + "breakdown_attribution_value": "1", + "funnel_order_type": "unordered", + } + + # event + events_by_person = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$browser": "Chrome"}, + }, + {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, + ], + "person2": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + # {"event": "buy", "timestamp": datetime(2020, 1, 2, 13), "properties": {"$browser": "Safari"}} + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Mac"}, + }, + # {"event": "buy", "timestamp": datetime(2020, 1, 2, 15)} + ], + "person4": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 15), + "properties": {"$browser": 0}, + }, + # step attribution means alakazam is valid when step = 1 + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 16), + "properties": {"$browser": "alakazam"}, + }, + ], + } + people = journeys_for(events_by_person, self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + results = sorted(results, key=lambda res: res[0]["breakdown"]) + + # Breakdown by step_1 means funnel items that never reach step_1 are NULLed out + self.assertEqual(len(results), 4) + # Chrome and Mac and Safari goes away + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="Completed 1 step", breakdown=[""], count=1), + FunnelStepResult( + name="Completed 2 steps", + breakdown=[""], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, ""), [people["person1"].uuid]) + + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="Completed 1 step", breakdown=["0"], count=1), + FunnelStepResult(name="Completed 2 steps", breakdown=["0"], count=0), + ], + ) + + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "0"), [people["person4"].uuid]) + + self._assert_funnel_breakdown_result_is_correct( + results[2], + [ + FunnelStepResult(name="Completed 1 step", breakdown=["Chrome"], count=1), + FunnelStepResult(name="Completed 2 steps", breakdown=["Chrome"], count=0), + ], + ) + + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "Chrome"), [people["person1"].uuid]) + + self._assert_funnel_breakdown_result_is_correct( + results[3], + [ + FunnelStepResult(name="Completed 1 step", breakdown=["alakazam"], count=1), + FunnelStepResult( + name="Completed 2 steps", + breakdown=["alakazam"], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "alakazam"), [people["person4"].uuid]) + + @snapshot_clickhouse_queries + def test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step(self): + # No person querying here, so snapshots are more legible + # overridden from factory, since we need to add `funnel_order_type` + + filters = { + "events": [ + {"id": "sign up", "order": 0}, + { + "id": "buy", + "properties": [{"type": "event", "key": "$version", "value": "xyz"}], + "order": 1, + }, + ], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": "$browser", + "breakdown_attribution_type": "step", + "breakdown_attribution_value": "1", + "funnel_order_type": "unordered", + } + + # event + events_by_person = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$browser": "Chrome", "$version": "xyz"}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 1, 13), + "properties": {"$browser": "Chrome"}, + }, + # discarded because doesn't meet criteria + ], + "person2": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 13), + "properties": {"$browser": "Safari", "$version": "xyz"}, + }, + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Mac"}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 15), + "properties": {"$version": "xyz", "$browser": "Mac"}, + }, + ], + # no properties dude, doesn't make it to step 1, and since breakdown on step 1, is discarded completely + "person5": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 2, 15)}, + {"event": "buy", "timestamp": datetime(2020, 1, 2, 16)}, + ], + } + journeys_for(events_by_person, self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + results = sorted(results, key=lambda res: res[0]["breakdown"]) + + self.assertEqual(len(results), 3) + + self.assertCountEqual([res[0]["breakdown"] for res in results], [[""], ["Mac"], ["Safari"]]) + + +class TestUnorderedFunnelGroupBreakdown( + ClickhouseTestMixin, + funnel_breakdown_group_test_factory( # type: ignore + ClickhouseFunnelUnorderedActors, + ), +): + pass class TestFunnelUnorderedStepsConversionTime(