diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr index e01491ba20029..4c3ab9d949e19 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr @@ -98,9 +98,9 @@ # --- # name: TestFOSSFunnel.test_funnel_conversion_window_seconds.1 ''' - SELECT persons.id, - persons.id AS id, - persons.created_at AS created_at, + SELECT filterable_persons.id, + filterable_persons.id AS id, + filterable_persons.created_at AS created_at, 1 FROM (SELECT aggregation_target AS actor_id @@ -183,15 +183,50 @@ HAVING ifNull(equals(steps, max_steps), isNull(steps) and isNull(max_steps))) WHERE ifNull(in(steps, [2, 3]), 0) - ORDER BY aggregation_target ASC) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT argMax(toTimeZone(person.created_at, 'UTC'), person.version) AS created_at, person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT aggregation_target AS actor_id + FROM + (SELECT aggregation_target AS aggregation_target, steps AS steps, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, avg(step_2_conversion_time) AS step_2_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner, median(step_2_conversion_time) AS step_2_median_conversion_time_inner + FROM + (SELECT aggregation_target AS aggregation_target, steps AS steps, max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, step_1_conversion_time AS step_1_conversion_time, step_2_conversion_time AS step_2_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, step_1 AS step_1, latest_1 AS latest_1, step_2 AS step_2, latest_2 AS latest_2, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalSecond(15))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_0, 'UTC'), toIntervalSecond(15))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalSecond(15))), 0)), 2, 1)) AS steps, if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalSecond(15))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_1, 'UTC'), toIntervalSecond(15))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, step_1 AS step_1, latest_1 AS latest_1, step_2 AS step_2, min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2 + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, step_1 AS step_1, latest_1 AS latest_1, step_2 AS step_2, if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2 + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, step_2 AS step_2, min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2 + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e__pdi.person_id AS aggregation_target, if(equals(e.event, 'step one'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(equals(e.event, 'step two'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, if(equals(e.event, 'step three'), 1, 0) AS step_2, if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2 + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-05-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-05-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('step one', 'step three', 'step two'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [2, 3]), 0) + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.created_at DESC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.created_at DESC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -507,9 +542,9 @@ # --- # name: TestFOSSFunnel.test_funnel_with_property_groups.1 ''' - SELECT persons.id, - persons.id AS id, - persons.created_at AS created_at, + SELECT filterable_persons.id, + filterable_persons.id AS id, + filterable_persons.created_at AS created_at, 1 FROM (SELECT aggregation_target AS actor_id @@ -604,15 +639,59 @@ HAVING ifNull(equals(steps, max_steps), isNull(steps) and isNull(max_steps))) WHERE ifNull(in(steps, [1, 2, 3]), 0) - ORDER BY aggregation_target ASC) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT argMax(toTimeZone(person.created_at, 'UTC'), person.version) AS created_at, person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT aggregation_target AS actor_id + FROM + (SELECT aggregation_target AS aggregation_target, steps AS steps, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, avg(step_2_conversion_time) AS step_2_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner, median(step_2_conversion_time) AS step_2_median_conversion_time_inner + FROM + (SELECT aggregation_target AS aggregation_target, steps AS steps, max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, step_1_conversion_time AS step_1_conversion_time, step_2_conversion_time AS step_2_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, step_1 AS step_1, latest_1 AS latest_1, step_2 AS step_2, latest_2 AS latest_2, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1)) AS steps, if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_1, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, step_1 AS step_1, latest_1 AS latest_1, step_2 AS step_2, min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2 + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, step_1 AS step_1, latest_1 AS latest_1, step_2 AS step_2, if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2 + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, step_2 AS step_2, min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2 + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e__pdi.person_id AS aggregation_target, if(equals(e.event, 'user signed up'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(and(equals(e.event, '$pageview'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', ''), 'aloha.com'), 0)), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, if(and(equals(e.event, '$pageview'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', ''), 'aloha2.com'), 0)), 1, 0) AS step_2, if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2 + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + LEFT JOIN + (SELECT person.id AS id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'age'), ''), 'null'), '^"|"$', '') AS properties___age + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-07-01 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('$pageview', 'user signed up')), or(and(ifNull(ilike(e__pdi__person.properties___email, '%.com%'), 0), ifNull(equals(e__pdi__person.properties___age, '20'), 0)), or(ifNull(ilike(e__pdi__person.properties___email, '%.org%'), 0), ifNull(equals(e__pdi__person.properties___age, '28'), 0)))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2, 3]), 0) + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.created_at DESC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.created_at DESC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -626,9 +705,9 @@ # --- # name: TestFOSSFunnel.test_funnel_with_property_groups.2 ''' - SELECT persons.id, - persons.id AS id, - persons.created_at AS created_at, + SELECT filterable_persons.id, + filterable_persons.id AS id, + filterable_persons.created_at AS created_at, 1 FROM (SELECT aggregation_target AS actor_id @@ -723,15 +802,59 @@ HAVING ifNull(equals(steps, max_steps), isNull(steps) and isNull(max_steps))) WHERE ifNull(in(steps, [2, 3]), 0) - ORDER BY aggregation_target ASC) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT argMax(toTimeZone(person.created_at, 'UTC'), person.version) AS created_at, person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT aggregation_target AS actor_id + FROM + (SELECT aggregation_target AS aggregation_target, steps AS steps, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, avg(step_2_conversion_time) AS step_2_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner, median(step_2_conversion_time) AS step_2_median_conversion_time_inner + FROM + (SELECT aggregation_target AS aggregation_target, steps AS steps, max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, step_1_conversion_time AS step_1_conversion_time, step_2_conversion_time AS step_2_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, step_1 AS step_1, latest_1 AS latest_1, step_2 AS step_2, latest_2 AS latest_2, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1)) AS steps, if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_1, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, step_1 AS step_1, latest_1 AS latest_1, step_2 AS step_2, min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2 + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, step_1 AS step_1, latest_1 AS latest_1, step_2 AS step_2, if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2 + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, step_2 AS step_2, min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2 + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e__pdi.person_id AS aggregation_target, if(equals(e.event, 'user signed up'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(and(equals(e.event, '$pageview'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', ''), 'aloha.com'), 0)), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, if(and(equals(e.event, '$pageview'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', ''), 'aloha2.com'), 0)), 1, 0) AS step_2, if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2 + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + LEFT JOIN + (SELECT person.id AS id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'age'), ''), 'null'), '^"|"$', '') AS properties___age + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-07-01 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('$pageview', 'user signed up')), or(and(ifNull(ilike(e__pdi__person.properties___email, '%.com%'), 0), ifNull(equals(e__pdi__person.properties___age, '20'), 0)), or(ifNull(ilike(e__pdi__person.properties___email, '%.org%'), 0), ifNull(equals(e__pdi__person.properties___age, '28'), 0)))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [2, 3]), 0) + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.created_at DESC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.created_at DESC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -745,9 +868,9 @@ # --- # name: TestFOSSFunnel.test_funnel_with_property_groups.3 ''' - SELECT persons.id, - persons.id AS id, - persons.created_at AS created_at, + SELECT filterable_persons.id, + filterable_persons.id AS id, + filterable_persons.created_at AS created_at, 1 FROM (SELECT aggregation_target AS actor_id @@ -842,15 +965,59 @@ HAVING ifNull(equals(steps, max_steps), isNull(steps) and isNull(max_steps))) WHERE ifNull(in(steps, [3]), 0) - ORDER BY aggregation_target ASC) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT argMax(toTimeZone(person.created_at, 'UTC'), person.version) AS created_at, person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT aggregation_target AS actor_id + FROM + (SELECT aggregation_target AS aggregation_target, steps AS steps, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, avg(step_2_conversion_time) AS step_2_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner, median(step_2_conversion_time) AS step_2_median_conversion_time_inner + FROM + (SELECT aggregation_target AS aggregation_target, steps AS steps, max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, step_1_conversion_time AS step_1_conversion_time, step_2_conversion_time AS step_2_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, step_1 AS step_1, latest_1 AS latest_1, step_2 AS step_2, latest_2 AS latest_2, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1)) AS steps, if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_1, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, step_1 AS step_1, latest_1 AS latest_1, step_2 AS step_2, min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2 + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, step_1 AS step_1, latest_1 AS latest_1, step_2 AS step_2, if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2 + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, step_2 AS step_2, min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2 + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e__pdi.person_id AS aggregation_target, if(equals(e.event, 'user signed up'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(and(equals(e.event, '$pageview'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', ''), 'aloha.com'), 0)), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, if(and(equals(e.event, '$pageview'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', ''), 'aloha2.com'), 0)), 1, 0) AS step_2, if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2 + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + LEFT JOIN + (SELECT person.id AS id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'age'), ''), 'null'), '^"|"$', '') AS properties___age + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-07-01 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('$pageview', 'user signed up')), or(and(ifNull(ilike(e__pdi__person.properties___email, '%.com%'), 0), ifNull(equals(e__pdi__person.properties___age, '20'), 0)), or(ifNull(ilike(e__pdi__person.properties___email, '%.org%'), 0), ifNull(equals(e__pdi__person.properties___age, '28'), 0)))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [3]), 0) + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.created_at DESC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.created_at DESC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlation.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlation.ambr index 7124c534df351..ba4e250851da9 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlation.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlation.ambr @@ -318,8 +318,8 @@ # --- # name: TestClickhouseFunnelCorrelation.test_basic_funnel_correlation_with_properties.1 ''' - SELECT persons.id, - persons.id AS id, + SELECT filterable_persons.id, + filterable_persons.id AS id, source.matching_events AS matching_events FROM (SELECT funnel_actors.actor_id AS actor_id, @@ -431,14 +431,60 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT funnel_actors.actor_id AS actor_id, any(funnel_actors.matching_events) AS matching_events + FROM + (SELECT aggregation_target AS actor_id, final_matching_events AS matching_events, timestamp AS timestamp, steps AS steps, final_timestamp AS final_timestamp, first_timestamp AS first_timestamp + FROM + (SELECT groupArray(10)(step_0_matching_event) AS step_0_matching_events, groupArray(10)(step_1_matching_event) AS step_1_matching_events, groupArray(10)(final_matching_event) AS final_matching_events, aggregation_target AS aggregation_target, steps AS steps, argMax(latest_0, steps) AS timestamp, argMax(latest_1, steps) AS final_timestamp, argMax(latest_0, steps) AS first_timestamp, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner + FROM + (SELECT tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, aggregation_target AS aggregation_target, steps AS steps, max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, step_1_conversion_time AS step_1_conversion_time, latest_0 AS latest_0, latest_1 AS latest_1, latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1) AS steps, if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e__pdi.person_id AS aggregation_target, e.uuid AS uuid, if(equals(e.event, 'user signed up'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, if(equals(e.event, 'paid'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + LEFT JOIN + (SELECT person.id AS id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$browser'), ''), 'null'), '^"|"$', '') AS `properties___$browser` + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__pdi__person.`properties___$browser`, 'Positive'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + WHERE ifNull(equals(funnel_actors.steps, 2), 0) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -467,8 +513,8 @@ # --- # name: TestClickhouseFunnelCorrelation.test_basic_funnel_correlation_with_properties.3 ''' - SELECT persons.id, - persons.id AS id, + SELECT filterable_persons.id, + filterable_persons.id AS id, source.matching_events AS matching_events FROM (SELECT funnel_actors.actor_id AS actor_id, @@ -580,14 +626,60 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT funnel_actors.actor_id AS actor_id, any(funnel_actors.matching_events) AS matching_events + FROM + (SELECT aggregation_target AS actor_id, final_matching_events AS matching_events, timestamp AS timestamp, steps AS steps, final_timestamp AS final_timestamp, first_timestamp AS first_timestamp + FROM + (SELECT groupArray(10)(step_0_matching_event) AS step_0_matching_events, groupArray(10)(step_1_matching_event) AS step_1_matching_events, groupArray(10)(final_matching_event) AS final_matching_events, aggregation_target AS aggregation_target, steps AS steps, argMax(latest_0, steps) AS timestamp, argMax(latest_1, steps) AS final_timestamp, argMax(latest_0, steps) AS first_timestamp, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner + FROM + (SELECT tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, aggregation_target AS aggregation_target, steps AS steps, max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, step_1_conversion_time AS step_1_conversion_time, latest_0 AS latest_0, latest_1 AS latest_1, latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1) AS steps, if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e__pdi.person_id AS aggregation_target, e.uuid AS uuid, if(equals(e.event, 'user signed up'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, if(equals(e.event, 'paid'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + LEFT JOIN + (SELECT person.id AS id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$browser'), ''), 'null'), '^"|"$', '') AS `properties___$browser` + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__pdi__person.`properties___$browser`, 'Positive'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -616,8 +708,8 @@ # --- # name: TestClickhouseFunnelCorrelation.test_basic_funnel_correlation_with_properties.5 ''' - SELECT persons.id, - persons.id AS id, + SELECT filterable_persons.id, + filterable_persons.id AS id, source.matching_events AS matching_events FROM (SELECT funnel_actors.actor_id AS actor_id, @@ -729,14 +821,60 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT funnel_actors.actor_id AS actor_id, any(funnel_actors.matching_events) AS matching_events + FROM + (SELECT aggregation_target AS actor_id, final_matching_events AS matching_events, timestamp AS timestamp, steps AS steps, final_timestamp AS final_timestamp, first_timestamp AS first_timestamp + FROM + (SELECT groupArray(10)(step_0_matching_event) AS step_0_matching_events, groupArray(10)(step_1_matching_event) AS step_1_matching_events, groupArray(10)(final_matching_event) AS final_matching_events, aggregation_target AS aggregation_target, steps AS steps, argMax(latest_0, steps) AS timestamp, argMax(latest_1, steps) AS final_timestamp, argMax(latest_0, steps) AS first_timestamp, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner + FROM + (SELECT tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, aggregation_target AS aggregation_target, steps AS steps, max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, step_1_conversion_time AS step_1_conversion_time, latest_0 AS latest_0, latest_1 AS latest_1, latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1) AS steps, if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e__pdi.person_id AS aggregation_target, e.uuid AS uuid, if(equals(e.event, 'user signed up'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, if(equals(e.event, 'paid'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + LEFT JOIN + (SELECT person.id AS id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$browser'), ''), 'null'), '^"|"$', '') AS `properties___$browser` + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__pdi__person.`properties___$browser`, 'Negative'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + WHERE ifNull(equals(funnel_actors.steps, 2), 0) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -765,8 +903,8 @@ # --- # name: TestClickhouseFunnelCorrelation.test_basic_funnel_correlation_with_properties.7 ''' - SELECT persons.id, - persons.id AS id, + SELECT filterable_persons.id, + filterable_persons.id AS id, source.matching_events AS matching_events FROM (SELECT funnel_actors.actor_id AS actor_id, @@ -878,14 +1016,60 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT funnel_actors.actor_id AS actor_id, any(funnel_actors.matching_events) AS matching_events + FROM + (SELECT aggregation_target AS actor_id, final_matching_events AS matching_events, timestamp AS timestamp, steps AS steps, final_timestamp AS final_timestamp, first_timestamp AS first_timestamp + FROM + (SELECT groupArray(10)(step_0_matching_event) AS step_0_matching_events, groupArray(10)(step_1_matching_event) AS step_1_matching_events, groupArray(10)(final_matching_event) AS final_matching_events, aggregation_target AS aggregation_target, steps AS steps, argMax(latest_0, steps) AS timestamp, argMax(latest_1, steps) AS final_timestamp, argMax(latest_0, steps) AS first_timestamp, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner + FROM + (SELECT tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, aggregation_target AS aggregation_target, steps AS steps, max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, step_1_conversion_time AS step_1_conversion_time, latest_0 AS latest_0, latest_1 AS latest_1, latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1) AS steps, if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e__pdi.person_id AS aggregation_target, e.uuid AS uuid, if(equals(e.event, 'user signed up'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, if(equals(e.event, 'paid'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + LEFT JOIN + (SELECT person.id AS id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$browser'), ''), 'null'), '^"|"$', '') AS `properties___$browser` + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__pdi__person.`properties___$browser`, 'Negative'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -1077,8 +1261,8 @@ # --- # name: TestClickhouseFunnelCorrelation.test_basic_funnel_correlation_with_properties_materialized.1 ''' - SELECT persons.id, - persons.id AS id, + SELECT filterable_persons.id, + filterable_persons.id AS id, source.matching_events AS matching_events FROM (SELECT funnel_actors.actor_id AS actor_id, @@ -1190,14 +1374,60 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT funnel_actors.actor_id AS actor_id, any(funnel_actors.matching_events) AS matching_events + FROM + (SELECT aggregation_target AS actor_id, final_matching_events AS matching_events, timestamp AS timestamp, steps AS steps, final_timestamp AS final_timestamp, first_timestamp AS first_timestamp + FROM + (SELECT groupArray(10)(step_0_matching_event) AS step_0_matching_events, groupArray(10)(step_1_matching_event) AS step_1_matching_events, groupArray(10)(final_matching_event) AS final_matching_events, aggregation_target AS aggregation_target, steps AS steps, argMax(latest_0, steps) AS timestamp, argMax(latest_1, steps) AS final_timestamp, argMax(latest_0, steps) AS first_timestamp, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner + FROM + (SELECT tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, aggregation_target AS aggregation_target, steps AS steps, max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, step_1_conversion_time AS step_1_conversion_time, latest_0 AS latest_0, latest_1 AS latest_1, latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1) AS steps, if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e__pdi.person_id AS aggregation_target, e.uuid AS uuid, if(equals(e.event, 'user signed up'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, if(equals(e.event, 'paid'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + LEFT JOIN + (SELECT person.id AS id, nullIf(nullIf(person.`pmat_$browser`, ''), 'null') AS `properties___$browser` + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__pdi__person.`properties___$browser`, 'Positive'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + WHERE ifNull(equals(funnel_actors.steps, 2), 0) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -1226,8 +1456,8 @@ # --- # name: TestClickhouseFunnelCorrelation.test_basic_funnel_correlation_with_properties_materialized.3 ''' - SELECT persons.id, - persons.id AS id, + SELECT filterable_persons.id, + filterable_persons.id AS id, source.matching_events AS matching_events FROM (SELECT funnel_actors.actor_id AS actor_id, @@ -1339,14 +1569,60 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT funnel_actors.actor_id AS actor_id, any(funnel_actors.matching_events) AS matching_events + FROM + (SELECT aggregation_target AS actor_id, final_matching_events AS matching_events, timestamp AS timestamp, steps AS steps, final_timestamp AS final_timestamp, first_timestamp AS first_timestamp + FROM + (SELECT groupArray(10)(step_0_matching_event) AS step_0_matching_events, groupArray(10)(step_1_matching_event) AS step_1_matching_events, groupArray(10)(final_matching_event) AS final_matching_events, aggregation_target AS aggregation_target, steps AS steps, argMax(latest_0, steps) AS timestamp, argMax(latest_1, steps) AS final_timestamp, argMax(latest_0, steps) AS first_timestamp, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner + FROM + (SELECT tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, aggregation_target AS aggregation_target, steps AS steps, max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, step_1_conversion_time AS step_1_conversion_time, latest_0 AS latest_0, latest_1 AS latest_1, latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1) AS steps, if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e__pdi.person_id AS aggregation_target, e.uuid AS uuid, if(equals(e.event, 'user signed up'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, if(equals(e.event, 'paid'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + LEFT JOIN + (SELECT person.id AS id, nullIf(nullIf(person.`pmat_$browser`, ''), 'null') AS `properties___$browser` + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__pdi__person.`properties___$browser`, 'Positive'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -1375,8 +1651,8 @@ # --- # name: TestClickhouseFunnelCorrelation.test_basic_funnel_correlation_with_properties_materialized.5 ''' - SELECT persons.id, - persons.id AS id, + SELECT filterable_persons.id, + filterable_persons.id AS id, source.matching_events AS matching_events FROM (SELECT funnel_actors.actor_id AS actor_id, @@ -1488,14 +1764,60 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT funnel_actors.actor_id AS actor_id, any(funnel_actors.matching_events) AS matching_events + FROM + (SELECT aggregation_target AS actor_id, final_matching_events AS matching_events, timestamp AS timestamp, steps AS steps, final_timestamp AS final_timestamp, first_timestamp AS first_timestamp + FROM + (SELECT groupArray(10)(step_0_matching_event) AS step_0_matching_events, groupArray(10)(step_1_matching_event) AS step_1_matching_events, groupArray(10)(final_matching_event) AS final_matching_events, aggregation_target AS aggregation_target, steps AS steps, argMax(latest_0, steps) AS timestamp, argMax(latest_1, steps) AS final_timestamp, argMax(latest_0, steps) AS first_timestamp, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner + FROM + (SELECT tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, aggregation_target AS aggregation_target, steps AS steps, max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, step_1_conversion_time AS step_1_conversion_time, latest_0 AS latest_0, latest_1 AS latest_1, latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1) AS steps, if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e__pdi.person_id AS aggregation_target, e.uuid AS uuid, if(equals(e.event, 'user signed up'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, if(equals(e.event, 'paid'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + LEFT JOIN + (SELECT person.id AS id, nullIf(nullIf(person.`pmat_$browser`, ''), 'null') AS `properties___$browser` + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__pdi__person.`properties___$browser`, 'Negative'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + WHERE ifNull(equals(funnel_actors.steps, 2), 0) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -1524,8 +1846,8 @@ # --- # name: TestClickhouseFunnelCorrelation.test_basic_funnel_correlation_with_properties_materialized.7 ''' - SELECT persons.id, - persons.id AS id, + SELECT filterable_persons.id, + filterable_persons.id AS id, source.matching_events AS matching_events FROM (SELECT funnel_actors.actor_id AS actor_id, @@ -1637,14 +1959,60 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT funnel_actors.actor_id AS actor_id, any(funnel_actors.matching_events) AS matching_events + FROM + (SELECT aggregation_target AS actor_id, final_matching_events AS matching_events, timestamp AS timestamp, steps AS steps, final_timestamp AS final_timestamp, first_timestamp AS first_timestamp + FROM + (SELECT groupArray(10)(step_0_matching_event) AS step_0_matching_events, groupArray(10)(step_1_matching_event) AS step_1_matching_events, groupArray(10)(final_matching_event) AS final_matching_events, aggregation_target AS aggregation_target, steps AS steps, argMax(latest_0, steps) AS timestamp, argMax(latest_1, steps) AS final_timestamp, argMax(latest_0, steps) AS first_timestamp, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner + FROM + (SELECT tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, aggregation_target AS aggregation_target, steps AS steps, max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, step_1_conversion_time AS step_1_conversion_time, latest_0 AS latest_0, latest_1 AS latest_1, latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1) AS steps, if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e__pdi.person_id AS aggregation_target, e.uuid AS uuid, if(equals(e.event, 'user signed up'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, if(equals(e.event, 'paid'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + LEFT JOIN + (SELECT person.id AS id, nullIf(nullIf(person.`pmat_$browser`, ''), 'null') AS `properties___$browser` + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__pdi__person.`properties___$browser`, 'Negative'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlations_persons.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlations_persons.ambr index 2cd43d115573b..5e93052861bc9 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlations_persons.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlations_persons.ambr @@ -1,8 +1,8 @@ # serializer version: 1 # name: TestFunnelCorrelationsActors.test_funnel_correlation_on_event_with_recordings ''' - SELECT persons.id, - persons.id AS id, + SELECT filterable_persons.id, + filterable_persons.id AS id, source.matching_events AS matching_events FROM (SELECT funnel_actors.actor_id AS actor_id, @@ -111,14 +111,58 @@ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(event__pdi.person_id, funnel_actors.actor_id) WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC')))), notIn(event.event, ['$pageview', 'insight analyzed']), equals(event.event, 'insight loaded'), ifNull(equals(funnel_actors.steps, 2), 0)) GROUP BY actor_id - ORDER BY actor_id ASC) AS source + ORDER BY actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT funnel_actors.actor_id AS actor_id, any(funnel_actors.matching_events) AS matching_events + FROM events AS event + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS event__pdi ON equals(event.distinct_id, event__pdi.distinct_id) + JOIN + (SELECT aggregation_target AS actor_id, final_matching_events AS matching_events, timestamp AS timestamp, steps AS steps, final_timestamp AS final_timestamp, first_timestamp AS first_timestamp + FROM + (SELECT groupArray(10)(step_0_matching_event) AS step_0_matching_events, groupArray(10)(step_1_matching_event) AS step_1_matching_events, groupArray(10)(final_matching_event) AS final_matching_events, aggregation_target AS aggregation_target, steps AS steps, argMax(latest_0, steps) AS timestamp, argMax(latest_1, steps) AS final_timestamp, argMax(latest_0, steps) AS first_timestamp, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner + FROM + (SELECT tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, aggregation_target AS aggregation_target, steps AS steps, max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, step_1_conversion_time AS step_1_conversion_time, latest_0 AS latest_0, latest_1 AS latest_1, latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1) AS steps, if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e__pdi.person_id AS aggregation_target, e.uuid AS uuid, if(equals(e.event, '$pageview'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, if(equals(e.event, 'insight analyzed'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('$pageview', 'insight analyzed'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors ON equals(event__pdi.person_id, funnel_actors.actor_id) + WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC')))), notIn(event.event, ['$pageview', 'insight analyzed']), equals(event.event, 'insight loaded'), ifNull(equals(funnel_actors.steps, 2), 0)) + GROUP BY actor_id + ORDER BY actor_id ASC SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -147,8 +191,8 @@ # --- # name: TestFunnelCorrelationsActors.test_funnel_correlation_on_event_with_recordings.2 ''' - SELECT persons.id, - persons.id AS id, + SELECT filterable_persons.id, + filterable_persons.id AS id, source.matching_events AS matching_events FROM (SELECT funnel_actors.actor_id AS actor_id, @@ -323,14 +367,70 @@ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(event__pdi.person_id, funnel_actors.actor_id) WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC')))), notIn(event.event, ['$pageview', 'insight analyzed', 'insight updated']), equals(event.event, 'insight loaded'), ifNull(notEquals(funnel_actors.steps, 3), 1)) GROUP BY actor_id - ORDER BY actor_id ASC) AS source + ORDER BY actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT funnel_actors.actor_id AS actor_id, any(funnel_actors.matching_events) AS matching_events + FROM events AS event + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS event__pdi ON equals(event.distinct_id, event__pdi.distinct_id) + JOIN + (SELECT aggregation_target AS actor_id, final_matching_events AS matching_events, timestamp AS timestamp, steps AS steps, final_timestamp AS final_timestamp, first_timestamp AS first_timestamp + FROM + (SELECT groupArray(10)(step_0_matching_event) AS step_0_matching_events, groupArray(10)(step_1_matching_event) AS step_1_matching_events, groupArray(10)(step_2_matching_event) AS step_2_matching_events, groupArray(10)(final_matching_event) AS final_matching_events, aggregation_target AS aggregation_target, steps AS steps, argMax(latest_0, steps) AS timestamp, argMax(latest_2, steps) AS final_timestamp, argMax(latest_0, steps) AS first_timestamp, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, avg(step_2_conversion_time) AS step_2_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner, median(step_2_conversion_time) AS step_2_median_conversion_time_inner + FROM + (SELECT tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, tuple(latest_2, uuid_2, `$session_id_2`, `$window_id_2`) AS step_2_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, if(isNull(latest_2), step_1_matching_event, step_2_matching_event))) AS final_matching_event, aggregation_target AS aggregation_target, steps AS steps, max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, step_1_conversion_time AS step_1_conversion_time, step_2_conversion_time AS step_2_conversion_time, latest_0 AS latest_0, latest_2 AS latest_2, latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, step_2 AS step_2, latest_2 AS latest_2, uuid_2 AS uuid_2, `$session_id_2` AS `$session_id_2`, `$window_id_2` AS `$window_id_2`, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1)) AS steps, if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_1, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time, tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, tuple(latest_2, uuid_2, `$session_id_2`, `$window_id_2`) AS step_2_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, if(isNull(latest_2), step_1_matching_event, step_2_matching_event))) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, step_2 AS step_2, min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, last_value(uuid_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_2, last_value(`$session_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_2`, last_value(`$window_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_2` + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, step_2 AS step_2, if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2, if(ifNull(less(latest_2, latest_1), 0), NULL, uuid_2) AS uuid_2, if(ifNull(less(latest_2, latest_1), 0), NULL, `$session_id_2`) AS `$session_id_2`, if(ifNull(less(latest_2, latest_1), 0), NULL, `$window_id_2`) AS `$window_id_2` + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`, step_2 AS step_2, min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, last_value(uuid_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_2, last_value(`$session_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_2`, last_value(`$window_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_2` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e__pdi.person_id AS aggregation_target, e.uuid AS uuid, if(equals(e.event, '$pageview'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, if(equals(e.event, 'insight analyzed'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`, if(equals(e.event, 'insight updated'), 1, 0) AS step_2, if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, if(ifNull(equals(step_2, 1), 0), uuid, NULL) AS uuid_2, if(ifNull(equals(step_2, 1), 0), e.`$session_id`, NULL) AS `$session_id_2`, if(ifNull(equals(step_2, 1), 0), e.`$window_id`, NULL) AS `$window_id_2` + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('$pageview', 'insight analyzed', 'insight updated'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2, 3]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors ON equals(event__pdi.person_id, funnel_actors.actor_id) + WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC')))), notIn(event.event, ['$pageview', 'insight analyzed', 'insight updated']), equals(event.event, 'insight loaded'), ifNull(notEquals(funnel_actors.steps, 3), 1)) + GROUP BY actor_id + ORDER BY actor_id ASC SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -359,8 +459,8 @@ # --- # name: TestFunnelCorrelationsActors.test_funnel_correlation_on_properties_with_recordings ''' - SELECT persons.id, - persons.id AS id, + SELECT filterable_persons.id, + filterable_persons.id AS id, source.matching_events AS matching_events FROM (SELECT funnel_actors.actor_id AS actor_id, @@ -472,14 +572,60 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT funnel_actors.actor_id AS actor_id, any(funnel_actors.matching_events) AS matching_events + FROM + (SELECT aggregation_target AS actor_id, final_matching_events AS matching_events, timestamp AS timestamp, steps AS steps, final_timestamp AS final_timestamp, first_timestamp AS first_timestamp + FROM + (SELECT groupArray(10)(step_0_matching_event) AS step_0_matching_events, groupArray(10)(step_1_matching_event) AS step_1_matching_events, groupArray(10)(final_matching_event) AS final_matching_events, aggregation_target AS aggregation_target, steps AS steps, argMax(latest_0, steps) AS timestamp, argMax(latest_1, steps) AS final_timestamp, argMax(latest_0, steps) AS first_timestamp, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner + FROM + (SELECT tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, aggregation_target AS aggregation_target, steps AS steps, max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, step_1_conversion_time AS step_1_conversion_time, latest_0 AS latest_0, latest_1 AS latest_1, latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1) AS steps, if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e__pdi.person_id AS aggregation_target, e.uuid AS uuid, if(equals(e.event, '$pageview'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, if(equals(e.event, 'insight analyzed'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + LEFT JOIN + (SELECT person.id AS id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'foo'), ''), 'null'), '^"|"$', '') AS properties___foo + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('$pageview', 'insight analyzed')), ifNull(equals(e__pdi__person.properties___foo, 'bar'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + WHERE ifNull(equals(funnel_actors.steps, 2), 0) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -508,8 +654,8 @@ # --- # name: TestFunnelCorrelationsActors.test_strict_funnel_correlation_with_recordings ''' - SELECT persons.id, - persons.id AS id, + SELECT filterable_persons.id, + filterable_persons.id AS id, source.matching_events AS matching_events FROM (SELECT funnel_actors.actor_id AS actor_id, @@ -621,14 +767,60 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT funnel_actors.actor_id AS actor_id, any(funnel_actors.matching_events) AS matching_events + FROM + (SELECT aggregation_target AS actor_id, final_matching_events AS matching_events, timestamp AS timestamp, steps AS steps, final_timestamp AS final_timestamp, first_timestamp AS first_timestamp + FROM + (SELECT groupArray(10)(step_0_matching_event) AS step_0_matching_events, groupArray(10)(step_1_matching_event) AS step_1_matching_events, groupArray(10)(final_matching_event) AS final_matching_events, aggregation_target AS aggregation_target, steps AS steps, argMax(latest_0, steps) AS timestamp, argMax(latest_1, steps) AS final_timestamp, argMax(latest_0, steps) AS first_timestamp, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner + FROM + (SELECT tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, aggregation_target AS aggregation_target, steps AS steps, max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, step_1_conversion_time AS step_1_conversion_time, latest_0 AS latest_0, latest_1 AS latest_1, latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1) AS steps, if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS latest_1, min(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS uuid_1, min(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS `$session_id_1`, min(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e__pdi.person_id AS aggregation_target, e.uuid AS uuid, if(equals(e.event, '$pageview'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, if(equals(e.event, 'insight analyzed'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + LEFT JOIN + (SELECT person.id AS id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'foo'), ''), 'null'), '^"|"$', '') AS properties___foo + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), ifNull(equals(e__pdi__person.properties___foo, 'bar'), 0)))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + WHERE ifNull(equals(funnel_actors.steps, 2), 0) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -657,8 +849,8 @@ # --- # name: TestFunnelCorrelationsActors.test_strict_funnel_correlation_with_recordings.2 ''' - SELECT persons.id, - persons.id AS id, + SELECT filterable_persons.id, + filterable_persons.id AS id, source.matching_events AS matching_events FROM (SELECT funnel_actors.actor_id AS actor_id, @@ -770,14 +962,60 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT funnel_actors.actor_id AS actor_id, any(funnel_actors.matching_events) AS matching_events + FROM + (SELECT aggregation_target AS actor_id, final_matching_events AS matching_events, timestamp AS timestamp, steps AS steps, final_timestamp AS final_timestamp, first_timestamp AS first_timestamp + FROM + (SELECT groupArray(10)(step_0_matching_event) AS step_0_matching_events, groupArray(10)(step_1_matching_event) AS step_1_matching_events, groupArray(10)(final_matching_event) AS final_matching_events, aggregation_target AS aggregation_target, steps AS steps, argMax(latest_0, steps) AS timestamp, argMax(latest_1, steps) AS final_timestamp, argMax(latest_0, steps) AS first_timestamp, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner + FROM + (SELECT tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, aggregation_target AS aggregation_target, steps AS steps, max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, step_1_conversion_time AS step_1_conversion_time, latest_0 AS latest_0, latest_1 AS latest_1, latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1) AS steps, if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS latest_1, min(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS uuid_1, min(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS `$session_id_1`, min(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e__pdi.person_id AS aggregation_target, e.uuid AS uuid, if(equals(e.event, '$pageview'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, if(equals(e.event, 'insight analyzed'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + LEFT JOIN + (SELECT person.id AS id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'foo'), ''), 'null'), '^"|"$', '') AS properties___foo + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), ifNull(equals(e__pdi__person.properties___foo, 'bar'), 0)))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_persons.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_persons.ambr index abd09217bc9af..9bb5d369842e6 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_persons.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_persons.ambr @@ -1,8 +1,8 @@ # serializer version: 1 # name: TestFunnelPersons.test_funnel_person_recordings ''' - SELECT persons.id, - persons.id AS id, + SELECT filterable_persons.id, + filterable_persons.id AS id, source.matching_events AS matching_events FROM (SELECT aggregation_target AS actor_id, @@ -153,14 +153,58 @@ HAVING ifNull(equals(steps, max_steps), isNull(steps) and isNull(max_steps))) WHERE ifNull(in(steps, [1, 2, 3]), 0) - ORDER BY aggregation_target ASC) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT aggregation_target AS actor_id, step_0_matching_events AS matching_events + FROM + (SELECT groupArray(10)(step_0_matching_event) AS step_0_matching_events, groupArray(10)(step_1_matching_event) AS step_1_matching_events, groupArray(10)(step_2_matching_event) AS step_2_matching_events, groupArray(10)(final_matching_event) AS final_matching_events, aggregation_target AS aggregation_target, steps AS steps, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, avg(step_2_conversion_time) AS step_2_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner, median(step_2_conversion_time) AS step_2_median_conversion_time_inner + FROM + (SELECT tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, tuple(latest_2, uuid_2, `$session_id_2`, `$window_id_2`) AS step_2_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, if(isNull(latest_2), step_1_matching_event, step_2_matching_event))) AS final_matching_event, aggregation_target AS aggregation_target, steps AS steps, max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, step_1_conversion_time AS step_1_conversion_time, step_2_conversion_time AS step_2_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, step_2 AS step_2, latest_2 AS latest_2, uuid_2 AS uuid_2, `$session_id_2` AS `$session_id_2`, `$window_id_2` AS `$window_id_2`, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1)) AS steps, if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_1, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time, tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, tuple(latest_2, uuid_2, `$session_id_2`, `$window_id_2`) AS step_2_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, if(isNull(latest_2), step_1_matching_event, step_2_matching_event))) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, step_2 AS step_2, min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, last_value(uuid_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_2, last_value(`$session_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_2`, last_value(`$window_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_2` + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, step_2 AS step_2, if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2, if(ifNull(less(latest_2, latest_1), 0), NULL, uuid_2) AS uuid_2, if(ifNull(less(latest_2, latest_1), 0), NULL, `$session_id_2`) AS `$session_id_2`, if(ifNull(less(latest_2, latest_1), 0), NULL, `$window_id_2`) AS `$window_id_2` + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`, step_2 AS step_2, min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, last_value(uuid_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_2, last_value(`$session_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_2`, last_value(`$window_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_2` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e__pdi.person_id AS aggregation_target, e.uuid AS uuid, if(equals(e.event, 'step one'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, if(equals(e.event, 'step two'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`, if(equals(e.event, 'step three'), 1, 0) AS step_2, if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, if(ifNull(equals(step_2, 1), 0), uuid, NULL) AS uuid_2, if(ifNull(equals(step_2, 1), 0), e.`$session_id`, NULL) AS `$session_id_2`, if(ifNull(equals(step_2, 1), 0), e.`$window_id`, NULL) AS `$window_id_2` + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('step one', 'step three', 'step two'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2, 3]), 0) + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -189,8 +233,8 @@ # --- # name: TestFunnelPersons.test_funnel_person_recordings.2 ''' - SELECT persons.id, - persons.id AS id, + SELECT filterable_persons.id, + filterable_persons.id AS id, source.matching_events AS matching_events FROM (SELECT aggregation_target AS actor_id, @@ -341,14 +385,58 @@ HAVING ifNull(equals(steps, max_steps), isNull(steps) and isNull(max_steps))) WHERE ifNull(in(steps, [2, 3]), 0) - ORDER BY aggregation_target ASC) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT aggregation_target AS actor_id, step_1_matching_events AS matching_events + FROM + (SELECT groupArray(10)(step_0_matching_event) AS step_0_matching_events, groupArray(10)(step_1_matching_event) AS step_1_matching_events, groupArray(10)(step_2_matching_event) AS step_2_matching_events, groupArray(10)(final_matching_event) AS final_matching_events, aggregation_target AS aggregation_target, steps AS steps, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, avg(step_2_conversion_time) AS step_2_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner, median(step_2_conversion_time) AS step_2_median_conversion_time_inner + FROM + (SELECT tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, tuple(latest_2, uuid_2, `$session_id_2`, `$window_id_2`) AS step_2_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, if(isNull(latest_2), step_1_matching_event, step_2_matching_event))) AS final_matching_event, aggregation_target AS aggregation_target, steps AS steps, max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, step_1_conversion_time AS step_1_conversion_time, step_2_conversion_time AS step_2_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, step_2 AS step_2, latest_2 AS latest_2, uuid_2 AS uuid_2, `$session_id_2` AS `$session_id_2`, `$window_id_2` AS `$window_id_2`, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1)) AS steps, if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_1, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time, tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, tuple(latest_2, uuid_2, `$session_id_2`, `$window_id_2`) AS step_2_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, if(isNull(latest_2), step_1_matching_event, step_2_matching_event))) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, step_2 AS step_2, min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, last_value(uuid_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_2, last_value(`$session_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_2`, last_value(`$window_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_2` + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, step_2 AS step_2, if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2, if(ifNull(less(latest_2, latest_1), 0), NULL, uuid_2) AS uuid_2, if(ifNull(less(latest_2, latest_1), 0), NULL, `$session_id_2`) AS `$session_id_2`, if(ifNull(less(latest_2, latest_1), 0), NULL, `$window_id_2`) AS `$window_id_2` + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`, step_2 AS step_2, min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, last_value(uuid_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_2, last_value(`$session_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_2`, last_value(`$window_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_2` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e__pdi.person_id AS aggregation_target, e.uuid AS uuid, if(equals(e.event, 'step one'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, if(equals(e.event, 'step two'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`, if(equals(e.event, 'step three'), 1, 0) AS step_2, if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, if(ifNull(equals(step_2, 1), 0), uuid, NULL) AS uuid_2, if(ifNull(equals(step_2, 1), 0), e.`$session_id`, NULL) AS `$session_id_2`, if(ifNull(equals(step_2, 1), 0), e.`$window_id`, NULL) AS `$window_id_2` + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('step one', 'step three', 'step two'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [2, 3]), 0) + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -377,8 +465,8 @@ # --- # name: TestFunnelPersons.test_funnel_person_recordings.4 ''' - SELECT persons.id, - persons.id AS id, + SELECT filterable_persons.id, + filterable_persons.id AS id, source.matching_events AS matching_events FROM (SELECT aggregation_target AS actor_id, @@ -529,14 +617,58 @@ HAVING ifNull(equals(steps, max_steps), isNull(steps) and isNull(max_steps))) WHERE ifNull(equals(steps, 2), 0) - ORDER BY aggregation_target ASC) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT aggregation_target AS actor_id, step_1_matching_events AS matching_events + FROM + (SELECT groupArray(10)(step_0_matching_event) AS step_0_matching_events, groupArray(10)(step_1_matching_event) AS step_1_matching_events, groupArray(10)(step_2_matching_event) AS step_2_matching_events, groupArray(10)(final_matching_event) AS final_matching_events, aggregation_target AS aggregation_target, steps AS steps, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, avg(step_2_conversion_time) AS step_2_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner, median(step_2_conversion_time) AS step_2_median_conversion_time_inner + FROM + (SELECT tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, tuple(latest_2, uuid_2, `$session_id_2`, `$window_id_2`) AS step_2_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, if(isNull(latest_2), step_1_matching_event, step_2_matching_event))) AS final_matching_event, aggregation_target AS aggregation_target, steps AS steps, max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, step_1_conversion_time AS step_1_conversion_time, step_2_conversion_time AS step_2_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, step_2 AS step_2, latest_2 AS latest_2, uuid_2 AS uuid_2, `$session_id_2` AS `$session_id_2`, `$window_id_2` AS `$window_id_2`, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1)) AS steps, if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_1, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time, tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, tuple(latest_2, uuid_2, `$session_id_2`, `$window_id_2`) AS step_2_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, if(isNull(latest_2), step_1_matching_event, step_2_matching_event))) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, step_2 AS step_2, min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, last_value(uuid_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_2, last_value(`$session_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_2`, last_value(`$window_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_2` + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, step_2 AS step_2, if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2, if(ifNull(less(latest_2, latest_1), 0), NULL, uuid_2) AS uuid_2, if(ifNull(less(latest_2, latest_1), 0), NULL, `$session_id_2`) AS `$session_id_2`, if(ifNull(less(latest_2, latest_1), 0), NULL, `$window_id_2`) AS `$window_id_2` + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`, step_2 AS step_2, min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, last_value(uuid_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_2, last_value(`$session_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_2`, last_value(`$window_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_2` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e__pdi.person_id AS aggregation_target, e.uuid AS uuid, if(equals(e.event, 'step one'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, if(equals(e.event, 'step two'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`, if(equals(e.event, 'step three'), 1, 0) AS step_2, if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, if(ifNull(equals(step_2, 1), 0), uuid, NULL) AS uuid_2, if(ifNull(equals(step_2, 1), 0), e.`$session_id`, NULL) AS `$session_id_2`, if(ifNull(equals(step_2, 1), 0), e.`$window_id`, NULL) AS `$window_id_2` + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('step one', 'step three', 'step two'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(equals(steps, 2), 0) + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_strict_persons.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_strict_persons.ambr index 7ff818de40361..cc7685ff37c47 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_strict_persons.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_strict_persons.ambr @@ -1,8 +1,8 @@ # serializer version: 1 # name: TestFunnelStrictStepsPersons.test_strict_funnel_person_recordings ''' - SELECT persons.id, - persons.id AS id, + SELECT filterable_persons.id, + filterable_persons.id AS id, source.matching_events AS matching_events FROM (SELECT aggregation_target AS actor_id, @@ -113,14 +113,50 @@ HAVING ifNull(equals(steps, max_steps), isNull(steps) and isNull(max_steps))) WHERE ifNull(in(steps, [1, 2, 3]), 0) - ORDER BY aggregation_target ASC) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT aggregation_target AS actor_id, step_0_matching_events AS matching_events + FROM + (SELECT groupArray(10)(step_0_matching_event) AS step_0_matching_events, groupArray(10)(step_1_matching_event) AS step_1_matching_events, groupArray(10)(step_2_matching_event) AS step_2_matching_events, groupArray(10)(final_matching_event) AS final_matching_events, aggregation_target AS aggregation_target, steps AS steps, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, avg(step_2_conversion_time) AS step_2_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner, median(step_2_conversion_time) AS step_2_median_conversion_time_inner + FROM + (SELECT tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, tuple(latest_2, uuid_2, `$session_id_2`, `$window_id_2`) AS step_2_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, if(isNull(latest_2), step_1_matching_event, step_2_matching_event))) AS final_matching_event, aggregation_target AS aggregation_target, steps AS steps, max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, step_1_conversion_time AS step_1_conversion_time, step_2_conversion_time AS step_2_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, step_2 AS step_2, latest_2 AS latest_2, uuid_2 AS uuid_2, `$session_id_2` AS `$session_id_2`, `$window_id_2` AS `$window_id_2`, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1)) AS steps, if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_1, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time, tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, tuple(latest_2, uuid_2, `$session_id_2`, `$window_id_2`) AS step_2_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, if(isNull(latest_2), step_1_matching_event, step_2_matching_event))) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS latest_1, min(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS uuid_1, min(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS `$session_id_1`, min(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS `$window_id_1`, step_2 AS step_2, min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 2 PRECEDING AND 2 PRECEDING) AS latest_2, min(uuid_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 2 PRECEDING AND 2 PRECEDING) AS uuid_2, min(`$session_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 2 PRECEDING AND 2 PRECEDING) AS `$session_id_2`, min(`$window_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 2 PRECEDING AND 2 PRECEDING) AS `$window_id_2` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e__pdi.person_id AS aggregation_target, e.uuid AS uuid, if(equals(e.event, 'step one'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, if(equals(e.event, 'step two'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`, if(equals(e.event, 'step three'), 1, 0) AS step_2, if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, if(ifNull(equals(step_2, 1), 0), uuid, NULL) AS uuid_2, if(ifNull(equals(step_2, 1), 0), e.`$session_id`, NULL) AS `$session_id_2`, if(ifNull(equals(step_2, 1), 0), e.`$window_id`, NULL) AS `$window_id_2` + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC')))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2, 3]), 0) + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -149,8 +185,8 @@ # --- # name: TestFunnelStrictStepsPersons.test_strict_funnel_person_recordings.2 ''' - SELECT persons.id, - persons.id AS id, + SELECT filterable_persons.id, + filterable_persons.id AS id, source.matching_events AS matching_events FROM (SELECT aggregation_target AS actor_id, @@ -261,14 +297,50 @@ HAVING ifNull(equals(steps, max_steps), isNull(steps) and isNull(max_steps))) WHERE ifNull(in(steps, [2, 3]), 0) - ORDER BY aggregation_target ASC) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT aggregation_target AS actor_id, step_1_matching_events AS matching_events + FROM + (SELECT groupArray(10)(step_0_matching_event) AS step_0_matching_events, groupArray(10)(step_1_matching_event) AS step_1_matching_events, groupArray(10)(step_2_matching_event) AS step_2_matching_events, groupArray(10)(final_matching_event) AS final_matching_events, aggregation_target AS aggregation_target, steps AS steps, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, avg(step_2_conversion_time) AS step_2_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner, median(step_2_conversion_time) AS step_2_median_conversion_time_inner + FROM + (SELECT tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, tuple(latest_2, uuid_2, `$session_id_2`, `$window_id_2`) AS step_2_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, if(isNull(latest_2), step_1_matching_event, step_2_matching_event))) AS final_matching_event, aggregation_target AS aggregation_target, steps AS steps, max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, step_1_conversion_time AS step_1_conversion_time, step_2_conversion_time AS step_2_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, step_2 AS step_2, latest_2 AS latest_2, uuid_2 AS uuid_2, `$session_id_2` AS `$session_id_2`, `$window_id_2` AS `$window_id_2`, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1)) AS steps, if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_1, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time, tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, tuple(latest_2, uuid_2, `$session_id_2`, `$window_id_2`) AS step_2_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, if(isNull(latest_2), step_1_matching_event, step_2_matching_event))) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS latest_1, min(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS uuid_1, min(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS `$session_id_1`, min(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS `$window_id_1`, step_2 AS step_2, min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 2 PRECEDING AND 2 PRECEDING) AS latest_2, min(uuid_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 2 PRECEDING AND 2 PRECEDING) AS uuid_2, min(`$session_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 2 PRECEDING AND 2 PRECEDING) AS `$session_id_2`, min(`$window_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 2 PRECEDING AND 2 PRECEDING) AS `$window_id_2` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e__pdi.person_id AS aggregation_target, e.uuid AS uuid, if(equals(e.event, 'step one'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, if(equals(e.event, 'step two'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`, if(equals(e.event, 'step three'), 1, 0) AS step_2, if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, if(ifNull(equals(step_2, 1), 0), uuid, NULL) AS uuid_2, if(ifNull(equals(step_2, 1), 0), e.`$session_id`, NULL) AS `$session_id_2`, if(ifNull(equals(step_2, 1), 0), e.`$window_id`, NULL) AS `$window_id_2` + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC')))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [2, 3]), 0) + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -297,8 +369,8 @@ # --- # name: TestFunnelStrictStepsPersons.test_strict_funnel_person_recordings.4 ''' - SELECT persons.id, - persons.id AS id, + SELECT filterable_persons.id, + filterable_persons.id AS id, source.matching_events AS matching_events FROM (SELECT aggregation_target AS actor_id, @@ -409,14 +481,50 @@ HAVING ifNull(equals(steps, max_steps), isNull(steps) and isNull(max_steps))) WHERE ifNull(equals(steps, 2), 0) - ORDER BY aggregation_target ASC) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT aggregation_target AS actor_id, step_1_matching_events AS matching_events + FROM + (SELECT groupArray(10)(step_0_matching_event) AS step_0_matching_events, groupArray(10)(step_1_matching_event) AS step_1_matching_events, groupArray(10)(step_2_matching_event) AS step_2_matching_events, groupArray(10)(final_matching_event) AS final_matching_events, aggregation_target AS aggregation_target, steps AS steps, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, avg(step_2_conversion_time) AS step_2_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner, median(step_2_conversion_time) AS step_2_median_conversion_time_inner + FROM + (SELECT tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, tuple(latest_2, uuid_2, `$session_id_2`, `$window_id_2`) AS step_2_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, if(isNull(latest_2), step_1_matching_event, step_2_matching_event))) AS final_matching_event, aggregation_target AS aggregation_target, steps AS steps, max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, step_1_conversion_time AS step_1_conversion_time, step_2_conversion_time AS step_2_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, step_2 AS step_2, latest_2 AS latest_2, uuid_2 AS uuid_2, `$session_id_2` AS `$session_id_2`, `$window_id_2` AS `$window_id_2`, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1)) AS steps, if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_1, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time, tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, tuple(latest_2, uuid_2, `$session_id_2`, `$window_id_2`) AS step_2_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, if(isNull(latest_2), step_1_matching_event, step_2_matching_event))) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS latest_1, min(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS uuid_1, min(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS `$session_id_1`, min(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS `$window_id_1`, step_2 AS step_2, min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 2 PRECEDING AND 2 PRECEDING) AS latest_2, min(uuid_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 2 PRECEDING AND 2 PRECEDING) AS uuid_2, min(`$session_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 2 PRECEDING AND 2 PRECEDING) AS `$session_id_2`, min(`$window_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 2 PRECEDING AND 2 PRECEDING) AS `$window_id_2` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e__pdi.person_id AS aggregation_target, e.uuid AS uuid, if(equals(e.event, 'step one'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, if(equals(e.event, 'step two'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`, if(equals(e.event, 'step three'), 1, 0) AS step_2, if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, if(ifNull(equals(step_2, 1), 0), uuid, NULL) AS uuid_2, if(ifNull(equals(step_2, 1), 0), e.`$session_id`, NULL) AS `$session_id_2`, if(ifNull(equals(step_2, 1), 0), e.`$window_id`, NULL) AS `$window_id_2` + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC')))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(equals(steps, 2), 0) + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_trends_persons.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_trends_persons.ambr index 8a658948dedc5..bf89083a6719f 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_trends_persons.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_trends_persons.ambr @@ -1,8 +1,8 @@ # serializer version: 1 # name: TestFunnelTrendsPersons.test_funnel_trend_persons_returns_recordings ''' - SELECT persons.id, - persons.id AS id, + SELECT filterable_persons.id, + filterable_persons.id AS id, source.matching_events AS matching_events FROM (SELECT aggregation_target AS actor_id, @@ -139,14 +139,55 @@ GROUP BY aggregation_target, entrance_period_start) WHERE ifNull(greaterOrEquals(steps_completed, 2), 0) - ORDER BY aggregation_target ASC) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT aggregation_target AS actor_id, step_1_matching_events AS matching_events + FROM + (SELECT aggregation_target AS aggregation_target, toStartOfDay(timestamp) AS entrance_period_start, max(steps) AS steps_completed, groupArray(10)(step_0_matching_event) AS step_0_matching_events, groupArray(10)(step_1_matching_event) AS step_1_matching_events, groupArray(10)(step_2_matching_event) AS step_2_matching_events, groupArray(10)(final_matching_event) AS final_matching_events + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, step_2 AS step_2, latest_2 AS latest_2, uuid_2 AS uuid_2, `$session_id_2` AS `$session_id_2`, `$window_id_2` AS `$window_id_2`, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1)) AS steps, if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_1, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time, tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, tuple(latest_2, uuid_2, `$session_id_2`, `$window_id_2`) AS step_2_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, if(isNull(latest_2), step_1_matching_event, step_2_matching_event))) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, step_2 AS step_2, min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, last_value(uuid_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_2, last_value(`$session_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_2`, last_value(`$window_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_2` + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, step_2 AS step_2, if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2, if(ifNull(less(latest_2, latest_1), 0), NULL, uuid_2) AS uuid_2, if(ifNull(less(latest_2, latest_1), 0), NULL, `$session_id_2`) AS `$session_id_2`, if(ifNull(less(latest_2, latest_1), 0), NULL, `$window_id_2`) AS `$window_id_2` + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`, step_2 AS step_2, min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, last_value(uuid_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_2, last_value(`$session_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_2`, last_value(`$window_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_2` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e__pdi.person_id AS aggregation_target, e.uuid AS uuid, if(equals(e.event, 'step one'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, if(equals(e.event, 'step two'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`, if(equals(e.event, 'step three'), 1, 0) AS step_2, if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, if(ifNull(equals(step_2, 1), 0), uuid, NULL) AS uuid_2, if(ifNull(equals(step_2, 1), 0), e.`$session_id`, NULL) AS `$session_id_2`, if(ifNull(equals(step_2, 1), 0), e.`$window_id`, NULL) AS `$window_id_2` + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-05-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-05-07 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('step one', 'step three', 'step two'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))) + WHERE ifNull(equals(step_0, 1), 0)) + WHERE ifNull(equals(entrance_period_start, toDateTime64('2021-05-01 00:00:00.000000', 6, 'UTC')), 0) + GROUP BY aggregation_target, entrance_period_start) + WHERE ifNull(greaterOrEquals(steps_completed, 2), 0) + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -175,8 +216,8 @@ # --- # name: TestFunnelTrendsPersons.test_funnel_trend_persons_with_drop_off ''' - SELECT persons.id, - persons.id AS id, + SELECT filterable_persons.id, + filterable_persons.id AS id, source.matching_events AS matching_events FROM (SELECT aggregation_target AS actor_id, @@ -313,14 +354,55 @@ GROUP BY aggregation_target, entrance_period_start) WHERE and(ifNull(greaterOrEquals(steps_completed, 1), 0), ifNull(less(steps_completed, 3), 0)) - ORDER BY aggregation_target ASC) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT aggregation_target AS actor_id, final_matching_events AS matching_events + FROM + (SELECT aggregation_target AS aggregation_target, toStartOfDay(timestamp) AS entrance_period_start, max(steps) AS steps_completed, groupArray(10)(step_0_matching_event) AS step_0_matching_events, groupArray(10)(step_1_matching_event) AS step_1_matching_events, groupArray(10)(step_2_matching_event) AS step_2_matching_events, groupArray(10)(final_matching_event) AS final_matching_events + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, step_2 AS step_2, latest_2 AS latest_2, uuid_2 AS uuid_2, `$session_id_2` AS `$session_id_2`, `$window_id_2` AS `$window_id_2`, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1)) AS steps, if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_1, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time, tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, tuple(latest_2, uuid_2, `$session_id_2`, `$window_id_2`) AS step_2_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, if(isNull(latest_2), step_1_matching_event, step_2_matching_event))) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, step_2 AS step_2, min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, last_value(uuid_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_2, last_value(`$session_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_2`, last_value(`$window_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_2` + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, step_2 AS step_2, if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2, if(ifNull(less(latest_2, latest_1), 0), NULL, uuid_2) AS uuid_2, if(ifNull(less(latest_2, latest_1), 0), NULL, `$session_id_2`) AS `$session_id_2`, if(ifNull(less(latest_2, latest_1), 0), NULL, `$window_id_2`) AS `$window_id_2` + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`, step_2 AS step_2, min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, last_value(uuid_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_2, last_value(`$session_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_2`, last_value(`$window_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_2` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e__pdi.person_id AS aggregation_target, e.uuid AS uuid, if(equals(e.event, 'step one'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, if(equals(e.event, 'step two'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`, if(equals(e.event, 'step three'), 1, 0) AS step_2, if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, if(ifNull(equals(step_2, 1), 0), uuid, NULL) AS uuid_2, if(ifNull(equals(step_2, 1), 0), e.`$session_id`, NULL) AS `$session_id_2`, if(ifNull(equals(step_2, 1), 0), e.`$window_id`, NULL) AS `$window_id_2` + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-05-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-05-07 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('step one', 'step three', 'step two'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))) + WHERE ifNull(equals(step_0, 1), 0)) + WHERE ifNull(equals(entrance_period_start, toDateTime64('2021-05-01 00:00:00.000000', 6, 'UTC')), 0) + GROUP BY aggregation_target, entrance_period_start) + WHERE and(ifNull(greaterOrEquals(steps_completed, 1), 0), ifNull(less(steps_completed, 3), 0)) + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -349,8 +431,8 @@ # --- # name: TestFunnelTrendsPersons.test_funnel_trend_persons_with_no_to_step ''' - SELECT persons.id, - persons.id AS id, + SELECT filterable_persons.id, + filterable_persons.id AS id, source.matching_events AS matching_events FROM (SELECT aggregation_target AS actor_id, @@ -487,14 +569,55 @@ GROUP BY aggregation_target, entrance_period_start) WHERE ifNull(greaterOrEquals(steps_completed, 3), 0) - ORDER BY aggregation_target ASC) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT aggregation_target AS actor_id, final_matching_events AS matching_events + FROM + (SELECT aggregation_target AS aggregation_target, toStartOfDay(timestamp) AS entrance_period_start, max(steps) AS steps_completed, groupArray(10)(step_0_matching_event) AS step_0_matching_events, groupArray(10)(step_1_matching_event) AS step_1_matching_events, groupArray(10)(step_2_matching_event) AS step_2_matching_events, groupArray(10)(final_matching_event) AS final_matching_events + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, step_2 AS step_2, latest_2 AS latest_2, uuid_2 AS uuid_2, `$session_id_2` AS `$session_id_2`, `$window_id_2` AS `$window_id_2`, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1)) AS steps, if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_1, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time, tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, tuple(latest_2, uuid_2, `$session_id_2`, `$window_id_2`) AS step_2_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, if(isNull(latest_2), step_1_matching_event, step_2_matching_event))) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, step_2 AS step_2, min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, last_value(uuid_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_2, last_value(`$session_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_2`, last_value(`$window_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_2` + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, step_2 AS step_2, if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2, if(ifNull(less(latest_2, latest_1), 0), NULL, uuid_2) AS uuid_2, if(ifNull(less(latest_2, latest_1), 0), NULL, `$session_id_2`) AS `$session_id_2`, if(ifNull(less(latest_2, latest_1), 0), NULL, `$window_id_2`) AS `$window_id_2` + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`, step_2 AS step_2, min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, last_value(uuid_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_2, last_value(`$session_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_2`, last_value(`$window_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_2` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e__pdi.person_id AS aggregation_target, e.uuid AS uuid, if(equals(e.event, 'step one'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, if(equals(e.event, 'step two'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`, if(equals(e.event, 'step three'), 1, 0) AS step_2, if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, if(ifNull(equals(step_2, 1), 0), uuid, NULL) AS uuid_2, if(ifNull(equals(step_2, 1), 0), e.`$session_id`, NULL) AS `$session_id_2`, if(ifNull(equals(step_2, 1), 0), e.`$window_id`, NULL) AS `$window_id_2` + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-05-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-05-07 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('step one', 'step three', 'step two'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))) + WHERE ifNull(equals(step_0, 1), 0)) + WHERE ifNull(equals(entrance_period_start, toDateTime64('2021-05-01 00:00:00.000000', 6, 'UTC')), 0) + GROUP BY aggregation_target, entrance_period_start) + WHERE ifNull(greaterOrEquals(steps_completed, 3), 0) + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_unordered_persons.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_unordered_persons.ambr index 9a4ea2c43af05..ab710413373b7 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_unordered_persons.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_unordered_persons.ambr @@ -1,8 +1,8 @@ # serializer version: 1 # name: TestFunnelUnorderedStepsPersons.test_unordered_funnel_does_not_return_recordings ''' - SELECT persons.id, - persons.id AS id, + SELECT filterable_persons.id, + filterable_persons.id AS id, source.matching_events AS matching_events FROM (SELECT aggregation_target AS actor_id, @@ -257,14 +257,94 @@ HAVING ifNull(equals(steps, max_steps), isNull(steps) and isNull(max_steps))) WHERE ifNull(in(steps, [1, 2, 3]), 0) - ORDER BY aggregation_target ASC) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT aggregation_target AS actor_id, array() AS matching_events + FROM + (SELECT aggregation_target AS aggregation_target, steps AS steps, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, avg(step_2_conversion_time) AS step_2_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner, median(step_2_conversion_time) AS step_2_median_conversion_time_inner + FROM + (SELECT aggregation_target AS aggregation_target, steps AS steps, max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, step_1_conversion_time AS step_1_conversion_time, step_2_conversion_time AS step_2_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, step_2 AS step_2, latest_2 AS latest_2, uuid_2 AS uuid_2, `$session_id_2` AS `$session_id_2`, `$window_id_2` AS `$window_id_2`, arraySort([latest_0, latest_1, latest_2]) AS event_times, arraySum([if(and(ifNull(less(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 1, 0), if(and(ifNull(less(latest_0, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 1, 0), 1]) AS steps, arraySort([latest_0, latest_1, latest_2]) AS conversion_times, if(and(isNotNull(conversion_times[2]), ifNull(lessOrEquals(conversion_times[2], plus(toTimeZone(conversion_times[1], 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', conversion_times[1], conversion_times[2]), NULL) AS step_1_conversion_time, if(and(isNotNull(conversion_times[3]), ifNull(lessOrEquals(conversion_times[3], plus(toTimeZone(conversion_times[2], 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', conversion_times[2], conversion_times[3]), NULL) AS step_2_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`, step_2 AS step_2, min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, last_value(uuid_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_2, last_value(`$session_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_2`, last_value(`$window_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_2` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e__pdi.person_id AS aggregation_target, e.uuid AS uuid, if(equals(e.event, 'step one'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, if(equals(e.event, 'step two'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`, if(equals(e.event, 'step three'), 1, 0) AS step_2, if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, if(ifNull(equals(step_2, 1), 0), uuid, NULL) AS uuid_2, if(ifNull(equals(step_2, 1), 0), e.`$session_id`, NULL) AS `$session_id_2`, if(ifNull(equals(step_2, 1), 0), e.`$window_id`, NULL) AS `$window_id_2` + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('step one', 'step three', 'step two'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0) + UNION ALL SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, step_2 AS step_2, latest_2 AS latest_2, uuid_2 AS uuid_2, `$session_id_2` AS `$session_id_2`, `$window_id_2` AS `$window_id_2`, arraySort([latest_0, latest_1, latest_2]) AS event_times, arraySum([if(and(ifNull(less(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 1, 0), if(and(ifNull(less(latest_0, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 1, 0), 1]) AS steps, arraySort([latest_0, latest_1, latest_2]) AS conversion_times, if(and(isNotNull(conversion_times[2]), ifNull(lessOrEquals(conversion_times[2], plus(toTimeZone(conversion_times[1], 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', conversion_times[1], conversion_times[2]), NULL) AS step_1_conversion_time, if(and(isNotNull(conversion_times[3]), ifNull(lessOrEquals(conversion_times[3], plus(toTimeZone(conversion_times[2], 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', conversion_times[2], conversion_times[3]), NULL) AS step_2_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`, step_2 AS step_2, min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, last_value(uuid_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_2, last_value(`$session_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_2`, last_value(`$window_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_2` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e__pdi.person_id AS aggregation_target, e.uuid AS uuid, if(equals(e.event, 'step two'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, if(equals(e.event, 'step three'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`, if(equals(e.event, 'step one'), 1, 0) AS step_2, if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, if(ifNull(equals(step_2, 1), 0), uuid, NULL) AS uuid_2, if(ifNull(equals(step_2, 1), 0), e.`$session_id`, NULL) AS `$session_id_2`, if(ifNull(equals(step_2, 1), 0), e.`$window_id`, NULL) AS `$window_id_2` + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('step one', 'step three', 'step two'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0) + UNION ALL SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, step_2 AS step_2, latest_2 AS latest_2, uuid_2 AS uuid_2, `$session_id_2` AS `$session_id_2`, `$window_id_2` AS `$window_id_2`, arraySort([latest_0, latest_1, latest_2]) AS event_times, arraySum([if(and(ifNull(less(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 1, 0), if(and(ifNull(less(latest_0, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 1, 0), 1]) AS steps, arraySort([latest_0, latest_1, latest_2]) AS conversion_times, if(and(isNotNull(conversion_times[2]), ifNull(lessOrEquals(conversion_times[2], plus(toTimeZone(conversion_times[1], 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', conversion_times[1], conversion_times[2]), NULL) AS step_1_conversion_time, if(and(isNotNull(conversion_times[3]), ifNull(lessOrEquals(conversion_times[3], plus(toTimeZone(conversion_times[2], 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', conversion_times[2], conversion_times[3]), NULL) AS step_2_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`, step_2 AS step_2, min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, last_value(uuid_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_2, last_value(`$session_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_2`, last_value(`$window_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_2` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e__pdi.person_id AS aggregation_target, e.uuid AS uuid, if(equals(e.event, 'step three'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, if(equals(e.event, 'step one'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`, if(equals(e.event, 'step two'), 1, 0) AS step_2, if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, if(ifNull(equals(step_2, 1), 0), uuid, NULL) AS uuid_2, if(ifNull(equals(step_2, 1), 0), e.`$session_id`, NULL) AS `$session_id_2`, if(ifNull(equals(step_2, 1), 0), e.`$window_id`, NULL) AS `$window_id_2` + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('step one', 'step three', 'step two'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2, 3]), 0) + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, diff --git a/posthog/hogql_queries/insights/test/__snapshots__/test_paths_query_runner_ee.ambr b/posthog/hogql_queries/insights/test/__snapshots__/test_paths_query_runner_ee.ambr index afa0d1f23304a..488692557e9bd 100644 --- a/posthog/hogql_queries/insights/test/__snapshots__/test_paths_query_runner_ee.ambr +++ b/posthog/hogql_queries/insights/test/__snapshots__/test_paths_query_runner_ee.ambr @@ -1177,7 +1177,7 @@ # --- # name: TestClickhousePaths.test_person_dropoffs ''' - SELECT persons.id AS id + SELECT filterable_persons.id AS id FROM (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, @@ -1277,14 +1277,44 @@ arrayEnumerate(limited_path_timings) AS event_in_session_index)) WHERE and(ifNull(equals(path_dropoff_key, '2_step two'), 0), ifNull(equals(path_dropoff_key, path_key), isNull(path_dropoff_key) and isNull(path_key))) - GROUP BY person_id) AS source + GROUP BY person_id SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, count(*) AS event_count + FROM + (SELECT person_id AS person_id, path AS path, conversion_time AS conversion_time, event_in_session_index AS event_in_session_index, concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, path_dropoff_key AS path_dropoff_key, final_uuid AS uuid, final_timestamp AS timestamp, final_session_id AS session_id, final_window_id AS window_id + FROM + (SELECT person_id AS person_id, joined_path_tuple.1 AS path, joined_path_tuple.2 AS conversion_time, joined_path_tuple.3 AS prev_path, event_in_session_index, session_index AS session_index, arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, arrayFilter((x, y) -> y, time, mapping) AS timings, arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, indexOf(compact_path, NULL) AS target_index, joined_path_tuple.4 AS final_uuid, joined_path_tuple.5 AS final_timestamp, joined_path_tuple.6 AS final_session_id, joined_path_tuple.7 AS final_window_id, arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, arraySlice(filtered_path, 1, 5) AS limited_path, if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, arraySlice(filtered_timings, 1, 5) AS limited_timings, if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, arraySlice(filtered_uuid, 1, 5) AS limited_uuid, if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, arraySlice(filtered_timestamp, 1, 5) AS limited_timestamp, if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, arraySlice(filtered_session_id, 1, 5) AS limited_session_id, if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, arraySlice(filtered_window_id, 1, 5) AS limited_window_id, arrayDifference(limited_timings) AS timings_diff, concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, path_time_tuple.1 AS path_basic, path_time_tuple.2 AS time, session_index, arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, path_time_tuple.4 AS uuid_items, path_time_tuple.5 AS timestamp_items, path_time_tuple.6 AS session_id_items, path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, groupArray(timestamp) AS timing_list, groupArray(path_item) AS path_list, groupArray(uuid) AS uuid_list, groupArray(timestamp) AS timestamp_list, groupArray(session_id) AS session_id_list, groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, events__pdi.person_id AS person_id, ifNull(events.event, '') AS path_item_ungrouped, events.uuid AS uuid, toTimeZone(events.timestamp, 'UTC') AS timestamp, ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, NULL AS groupings, multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-07 23:59:59', 6, 'UTC')))), not(startsWith(events.event, '$'))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE and(ifNull(equals(path_dropoff_key, '2_step two'), 0), ifNull(equals(path_dropoff_key, path_key), isNull(path_dropoff_key) + and isNull(path_key))) + GROUP BY person_id SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -1298,7 +1328,7 @@ # --- # name: TestClickhousePaths.test_person_dropoffs.1 ''' - SELECT persons.id AS id + SELECT filterable_persons.id AS id FROM (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, @@ -1397,14 +1427,43 @@ JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index)) WHERE ifNull(equals(path_key, '2_step two'), 0) - GROUP BY person_id) AS source + GROUP BY person_id SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, count(*) AS event_count + FROM + (SELECT person_id AS person_id, path AS path, conversion_time AS conversion_time, event_in_session_index AS event_in_session_index, concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, path_dropoff_key AS path_dropoff_key, final_uuid AS uuid, final_timestamp AS timestamp, final_session_id AS session_id, final_window_id AS window_id + FROM + (SELECT person_id AS person_id, joined_path_tuple.1 AS path, joined_path_tuple.2 AS conversion_time, joined_path_tuple.3 AS prev_path, event_in_session_index, session_index AS session_index, arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, arrayFilter((x, y) -> y, time, mapping) AS timings, arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, indexOf(compact_path, NULL) AS target_index, joined_path_tuple.4 AS final_uuid, joined_path_tuple.5 AS final_timestamp, joined_path_tuple.6 AS final_session_id, joined_path_tuple.7 AS final_window_id, arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, arraySlice(filtered_path, 1, 5) AS limited_path, if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, arraySlice(filtered_timings, 1, 5) AS limited_timings, if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, arraySlice(filtered_uuid, 1, 5) AS limited_uuid, if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, arraySlice(filtered_timestamp, 1, 5) AS limited_timestamp, if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, arraySlice(filtered_session_id, 1, 5) AS limited_session_id, if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, arraySlice(filtered_window_id, 1, 5) AS limited_window_id, arrayDifference(limited_timings) AS timings_diff, concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, path_time_tuple.1 AS path_basic, path_time_tuple.2 AS time, session_index, arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, path_time_tuple.4 AS uuid_items, path_time_tuple.5 AS timestamp_items, path_time_tuple.6 AS session_id_items, path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, groupArray(timestamp) AS timing_list, groupArray(path_item) AS path_list, groupArray(uuid) AS uuid_list, groupArray(timestamp) AS timestamp_list, groupArray(session_id) AS session_id_list, groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, events__pdi.person_id AS person_id, ifNull(events.event, '') AS path_item_ungrouped, events.uuid AS uuid, toTimeZone(events.timestamp, 'UTC') AS timestamp, ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, NULL AS groupings, multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-07 23:59:59', 6, 'UTC')))), not(startsWith(events.event, '$'))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE ifNull(equals(path_key, '2_step two'), 0) + GROUP BY person_id SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -1418,7 +1477,7 @@ # --- # name: TestClickhousePaths.test_person_dropoffs.2 ''' - SELECT persons.id AS id + SELECT filterable_persons.id AS id FROM (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, @@ -1517,14 +1576,43 @@ JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index)) WHERE and(ifNull(equals(last_path_key, '2_step two'), 0), 1) - GROUP BY person_id) AS source + GROUP BY person_id SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, count(*) AS event_count + FROM + (SELECT person_id AS person_id, path AS path, conversion_time AS conversion_time, event_in_session_index AS event_in_session_index, concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, path_dropoff_key AS path_dropoff_key, final_uuid AS uuid, final_timestamp AS timestamp, final_session_id AS session_id, final_window_id AS window_id + FROM + (SELECT person_id AS person_id, joined_path_tuple.1 AS path, joined_path_tuple.2 AS conversion_time, joined_path_tuple.3 AS prev_path, event_in_session_index, session_index AS session_index, arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, arrayFilter((x, y) -> y, time, mapping) AS timings, arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, indexOf(compact_path, NULL) AS target_index, joined_path_tuple.4 AS final_uuid, joined_path_tuple.5 AS final_timestamp, joined_path_tuple.6 AS final_session_id, joined_path_tuple.7 AS final_window_id, arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, arraySlice(filtered_path, 1, 5) AS limited_path, if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, arraySlice(filtered_timings, 1, 5) AS limited_timings, if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, arraySlice(filtered_uuid, 1, 5) AS limited_uuid, if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, arraySlice(filtered_timestamp, 1, 5) AS limited_timestamp, if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, arraySlice(filtered_session_id, 1, 5) AS limited_session_id, if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, arraySlice(filtered_window_id, 1, 5) AS limited_window_id, arrayDifference(limited_timings) AS timings_diff, concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, path_time_tuple.1 AS path_basic, path_time_tuple.2 AS time, session_index, arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, path_time_tuple.4 AS uuid_items, path_time_tuple.5 AS timestamp_items, path_time_tuple.6 AS session_id_items, path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, groupArray(timestamp) AS timing_list, groupArray(path_item) AS path_list, groupArray(uuid) AS uuid_list, groupArray(timestamp) AS timestamp_list, groupArray(session_id) AS session_id_list, groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, events__pdi.person_id AS person_id, ifNull(events.event, '') AS path_item_ungrouped, events.uuid AS uuid, toTimeZone(events.timestamp, 'UTC') AS timestamp, ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, NULL AS groupings, multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-07 23:59:59', 6, 'UTC')))), not(startsWith(events.event, '$'))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE and(ifNull(equals(last_path_key, '2_step two'), 0), 1) + GROUP BY person_id SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -1538,7 +1626,7 @@ # --- # name: TestClickhousePaths.test_person_dropoffs.3 ''' - SELECT persons.id AS id + SELECT filterable_persons.id AS id FROM (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, @@ -1638,14 +1726,44 @@ arrayEnumerate(limited_path_timings) AS event_in_session_index)) WHERE and(ifNull(equals(path_dropoff_key, '3_step three'), 0), ifNull(equals(path_dropoff_key, path_key), isNull(path_dropoff_key) and isNull(path_key))) - GROUP BY person_id) AS source + GROUP BY person_id SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, count(*) AS event_count + FROM + (SELECT person_id AS person_id, path AS path, conversion_time AS conversion_time, event_in_session_index AS event_in_session_index, concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, path_dropoff_key AS path_dropoff_key, final_uuid AS uuid, final_timestamp AS timestamp, final_session_id AS session_id, final_window_id AS window_id + FROM + (SELECT person_id AS person_id, joined_path_tuple.1 AS path, joined_path_tuple.2 AS conversion_time, joined_path_tuple.3 AS prev_path, event_in_session_index, session_index AS session_index, arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, arrayFilter((x, y) -> y, time, mapping) AS timings, arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, indexOf(compact_path, NULL) AS target_index, joined_path_tuple.4 AS final_uuid, joined_path_tuple.5 AS final_timestamp, joined_path_tuple.6 AS final_session_id, joined_path_tuple.7 AS final_window_id, arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, arraySlice(filtered_path, 1, 5) AS limited_path, if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, arraySlice(filtered_timings, 1, 5) AS limited_timings, if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, arraySlice(filtered_uuid, 1, 5) AS limited_uuid, if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, arraySlice(filtered_timestamp, 1, 5) AS limited_timestamp, if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, arraySlice(filtered_session_id, 1, 5) AS limited_session_id, if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, arraySlice(filtered_window_id, 1, 5) AS limited_window_id, arrayDifference(limited_timings) AS timings_diff, concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, path_time_tuple.1 AS path_basic, path_time_tuple.2 AS time, session_index, arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, path_time_tuple.4 AS uuid_items, path_time_tuple.5 AS timestamp_items, path_time_tuple.6 AS session_id_items, path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, groupArray(timestamp) AS timing_list, groupArray(path_item) AS path_list, groupArray(uuid) AS uuid_list, groupArray(timestamp) AS timestamp_list, groupArray(session_id) AS session_id_list, groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, events__pdi.person_id AS person_id, ifNull(events.event, '') AS path_item_ungrouped, events.uuid AS uuid, toTimeZone(events.timestamp, 'UTC') AS timestamp, ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, NULL AS groupings, multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-07 23:59:59', 6, 'UTC')))), not(startsWith(events.event, '$'))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE and(ifNull(equals(path_dropoff_key, '3_step three'), 0), ifNull(equals(path_dropoff_key, path_key), isNull(path_dropoff_key) + and isNull(path_key))) + GROUP BY person_id SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -1659,7 +1777,7 @@ # --- # name: TestClickhousePaths.test_person_dropoffs.4 ''' - SELECT persons.id AS id + SELECT filterable_persons.id AS id FROM (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, @@ -1758,14 +1876,43 @@ JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index)) WHERE ifNull(equals(path_key, '3_step three'), 0) - GROUP BY person_id) AS source + GROUP BY person_id SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, count(*) AS event_count + FROM + (SELECT person_id AS person_id, path AS path, conversion_time AS conversion_time, event_in_session_index AS event_in_session_index, concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, path_dropoff_key AS path_dropoff_key, final_uuid AS uuid, final_timestamp AS timestamp, final_session_id AS session_id, final_window_id AS window_id + FROM + (SELECT person_id AS person_id, joined_path_tuple.1 AS path, joined_path_tuple.2 AS conversion_time, joined_path_tuple.3 AS prev_path, event_in_session_index, session_index AS session_index, arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, arrayFilter((x, y) -> y, time, mapping) AS timings, arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, indexOf(compact_path, NULL) AS target_index, joined_path_tuple.4 AS final_uuid, joined_path_tuple.5 AS final_timestamp, joined_path_tuple.6 AS final_session_id, joined_path_tuple.7 AS final_window_id, arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, arraySlice(filtered_path, 1, 5) AS limited_path, if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, arraySlice(filtered_timings, 1, 5) AS limited_timings, if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, arraySlice(filtered_uuid, 1, 5) AS limited_uuid, if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, arraySlice(filtered_timestamp, 1, 5) AS limited_timestamp, if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, arraySlice(filtered_session_id, 1, 5) AS limited_session_id, if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, arraySlice(filtered_window_id, 1, 5) AS limited_window_id, arrayDifference(limited_timings) AS timings_diff, concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, path_time_tuple.1 AS path_basic, path_time_tuple.2 AS time, session_index, arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, path_time_tuple.4 AS uuid_items, path_time_tuple.5 AS timestamp_items, path_time_tuple.6 AS session_id_items, path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, groupArray(timestamp) AS timing_list, groupArray(path_item) AS path_list, groupArray(uuid) AS uuid_list, groupArray(timestamp) AS timestamp_list, groupArray(session_id) AS session_id_list, groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, events__pdi.person_id AS person_id, ifNull(events.event, '') AS path_item_ungrouped, events.uuid AS uuid, toTimeZone(events.timestamp, 'UTC') AS timestamp, ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, NULL AS groupings, multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-07 23:59:59', 6, 'UTC')))), not(startsWith(events.event, '$'))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE ifNull(equals(path_key, '3_step three'), 0) + GROUP BY person_id SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -1779,7 +1926,7 @@ # --- # name: TestClickhousePaths.test_person_dropoffs.5 ''' - SELECT persons.id AS id + SELECT filterable_persons.id AS id FROM (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, @@ -1878,14 +2025,43 @@ JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index)) WHERE and(ifNull(equals(last_path_key, '3_step three'), 0), 1) - GROUP BY person_id) AS source + GROUP BY person_id SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, count(*) AS event_count + FROM + (SELECT person_id AS person_id, path AS path, conversion_time AS conversion_time, event_in_session_index AS event_in_session_index, concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, path_dropoff_key AS path_dropoff_key, final_uuid AS uuid, final_timestamp AS timestamp, final_session_id AS session_id, final_window_id AS window_id + FROM + (SELECT person_id AS person_id, joined_path_tuple.1 AS path, joined_path_tuple.2 AS conversion_time, joined_path_tuple.3 AS prev_path, event_in_session_index, session_index AS session_index, arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, arrayFilter((x, y) -> y, time, mapping) AS timings, arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, indexOf(compact_path, NULL) AS target_index, joined_path_tuple.4 AS final_uuid, joined_path_tuple.5 AS final_timestamp, joined_path_tuple.6 AS final_session_id, joined_path_tuple.7 AS final_window_id, arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, arraySlice(filtered_path, 1, 5) AS limited_path, if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, arraySlice(filtered_timings, 1, 5) AS limited_timings, if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, arraySlice(filtered_uuid, 1, 5) AS limited_uuid, if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, arraySlice(filtered_timestamp, 1, 5) AS limited_timestamp, if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, arraySlice(filtered_session_id, 1, 5) AS limited_session_id, if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, arraySlice(filtered_window_id, 1, 5) AS limited_window_id, arrayDifference(limited_timings) AS timings_diff, concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, path_time_tuple.1 AS path_basic, path_time_tuple.2 AS time, session_index, arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, path_time_tuple.4 AS uuid_items, path_time_tuple.5 AS timestamp_items, path_time_tuple.6 AS session_id_items, path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, groupArray(timestamp) AS timing_list, groupArray(path_item) AS path_list, groupArray(uuid) AS uuid_list, groupArray(timestamp) AS timestamp_list, groupArray(session_id) AS session_id_list, groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, events__pdi.person_id AS person_id, ifNull(events.event, '') AS path_item_ungrouped, events.uuid AS uuid, toTimeZone(events.timestamp, 'UTC') AS timestamp, ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, NULL AS groupings, multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-07 23:59:59', 6, 'UTC')))), not(startsWith(events.event, '$'))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE and(ifNull(equals(last_path_key, '3_step three'), 0), 1) + GROUP BY person_id SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -1899,7 +2075,7 @@ # --- # name: TestClickhousePaths.test_person_dropoffs.6 ''' - SELECT persons.id AS id + SELECT filterable_persons.id AS id FROM (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, @@ -1999,14 +2175,44 @@ arrayEnumerate(limited_path_timings) AS event_in_session_index)) WHERE and(ifNull(equals(path_dropoff_key, '4_step four'), 0), ifNull(equals(path_dropoff_key, path_key), isNull(path_dropoff_key) and isNull(path_key))) - GROUP BY person_id) AS source + GROUP BY person_id SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, count(*) AS event_count + FROM + (SELECT person_id AS person_id, path AS path, conversion_time AS conversion_time, event_in_session_index AS event_in_session_index, concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, path_dropoff_key AS path_dropoff_key, final_uuid AS uuid, final_timestamp AS timestamp, final_session_id AS session_id, final_window_id AS window_id + FROM + (SELECT person_id AS person_id, joined_path_tuple.1 AS path, joined_path_tuple.2 AS conversion_time, joined_path_tuple.3 AS prev_path, event_in_session_index, session_index AS session_index, arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, arrayFilter((x, y) -> y, time, mapping) AS timings, arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, indexOf(compact_path, NULL) AS target_index, joined_path_tuple.4 AS final_uuid, joined_path_tuple.5 AS final_timestamp, joined_path_tuple.6 AS final_session_id, joined_path_tuple.7 AS final_window_id, arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, arraySlice(filtered_path, 1, 5) AS limited_path, if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, arraySlice(filtered_timings, 1, 5) AS limited_timings, if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, arraySlice(filtered_uuid, 1, 5) AS limited_uuid, if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, arraySlice(filtered_timestamp, 1, 5) AS limited_timestamp, if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, arraySlice(filtered_session_id, 1, 5) AS limited_session_id, if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, arraySlice(filtered_window_id, 1, 5) AS limited_window_id, arrayDifference(limited_timings) AS timings_diff, concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, path_time_tuple.1 AS path_basic, path_time_tuple.2 AS time, session_index, arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, path_time_tuple.4 AS uuid_items, path_time_tuple.5 AS timestamp_items, path_time_tuple.6 AS session_id_items, path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, groupArray(timestamp) AS timing_list, groupArray(path_item) AS path_list, groupArray(uuid) AS uuid_list, groupArray(timestamp) AS timestamp_list, groupArray(session_id) AS session_id_list, groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, events__pdi.person_id AS person_id, ifNull(events.event, '') AS path_item_ungrouped, events.uuid AS uuid, toTimeZone(events.timestamp, 'UTC') AS timestamp, ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, NULL AS groupings, multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-07 23:59:59', 6, 'UTC')))), not(startsWith(events.event, '$'))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE and(ifNull(equals(path_dropoff_key, '4_step four'), 0), ifNull(equals(path_dropoff_key, path_key), isNull(path_dropoff_key) + and isNull(path_key))) + GROUP BY person_id SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -2020,7 +2226,7 @@ # --- # name: TestClickhousePaths.test_person_dropoffs.7 ''' - SELECT persons.id AS id + SELECT filterable_persons.id AS id FROM (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, @@ -2119,14 +2325,43 @@ JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index)) WHERE ifNull(equals(path_key, '4_step four'), 0) - GROUP BY person_id) AS source + GROUP BY person_id SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, count(*) AS event_count + FROM + (SELECT person_id AS person_id, path AS path, conversion_time AS conversion_time, event_in_session_index AS event_in_session_index, concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, path_dropoff_key AS path_dropoff_key, final_uuid AS uuid, final_timestamp AS timestamp, final_session_id AS session_id, final_window_id AS window_id + FROM + (SELECT person_id AS person_id, joined_path_tuple.1 AS path, joined_path_tuple.2 AS conversion_time, joined_path_tuple.3 AS prev_path, event_in_session_index, session_index AS session_index, arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, arrayFilter((x, y) -> y, time, mapping) AS timings, arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, indexOf(compact_path, NULL) AS target_index, joined_path_tuple.4 AS final_uuid, joined_path_tuple.5 AS final_timestamp, joined_path_tuple.6 AS final_session_id, joined_path_tuple.7 AS final_window_id, arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, arraySlice(filtered_path, 1, 5) AS limited_path, if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, arraySlice(filtered_timings, 1, 5) AS limited_timings, if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, arraySlice(filtered_uuid, 1, 5) AS limited_uuid, if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, arraySlice(filtered_timestamp, 1, 5) AS limited_timestamp, if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, arraySlice(filtered_session_id, 1, 5) AS limited_session_id, if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, arraySlice(filtered_window_id, 1, 5) AS limited_window_id, arrayDifference(limited_timings) AS timings_diff, concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, path_time_tuple.1 AS path_basic, path_time_tuple.2 AS time, session_index, arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, path_time_tuple.4 AS uuid_items, path_time_tuple.5 AS timestamp_items, path_time_tuple.6 AS session_id_items, path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, groupArray(timestamp) AS timing_list, groupArray(path_item) AS path_list, groupArray(uuid) AS uuid_list, groupArray(timestamp) AS timestamp_list, groupArray(session_id) AS session_id_list, groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, events__pdi.person_id AS person_id, ifNull(events.event, '') AS path_item_ungrouped, events.uuid AS uuid, toTimeZone(events.timestamp, 'UTC') AS timestamp, ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, NULL AS groupings, multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-07 23:59:59', 6, 'UTC')))), not(startsWith(events.event, '$'))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE ifNull(equals(path_key, '4_step four'), 0) + GROUP BY person_id SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -2140,7 +2375,7 @@ # --- # name: TestClickhousePaths.test_person_dropoffs.8 ''' - SELECT persons.id AS id + SELECT filterable_persons.id AS id FROM (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, @@ -2239,14 +2474,43 @@ JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index)) WHERE and(ifNull(equals(last_path_key, '4_step four'), 0), 1) - GROUP BY person_id) AS source + GROUP BY person_id SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, count(*) AS event_count + FROM + (SELECT person_id AS person_id, path AS path, conversion_time AS conversion_time, event_in_session_index AS event_in_session_index, concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, path_dropoff_key AS path_dropoff_key, final_uuid AS uuid, final_timestamp AS timestamp, final_session_id AS session_id, final_window_id AS window_id + FROM + (SELECT person_id AS person_id, joined_path_tuple.1 AS path, joined_path_tuple.2 AS conversion_time, joined_path_tuple.3 AS prev_path, event_in_session_index, session_index AS session_index, arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, arrayFilter((x, y) -> y, time, mapping) AS timings, arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, indexOf(compact_path, NULL) AS target_index, joined_path_tuple.4 AS final_uuid, joined_path_tuple.5 AS final_timestamp, joined_path_tuple.6 AS final_session_id, joined_path_tuple.7 AS final_window_id, arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, arraySlice(filtered_path, 1, 5) AS limited_path, if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, arraySlice(filtered_timings, 1, 5) AS limited_timings, if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, arraySlice(filtered_uuid, 1, 5) AS limited_uuid, if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, arraySlice(filtered_timestamp, 1, 5) AS limited_timestamp, if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, arraySlice(filtered_session_id, 1, 5) AS limited_session_id, if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, arraySlice(filtered_window_id, 1, 5) AS limited_window_id, arrayDifference(limited_timings) AS timings_diff, concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, path_time_tuple.1 AS path_basic, path_time_tuple.2 AS time, session_index, arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, path_time_tuple.4 AS uuid_items, path_time_tuple.5 AS timestamp_items, path_time_tuple.6 AS session_id_items, path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, groupArray(timestamp) AS timing_list, groupArray(path_item) AS path_list, groupArray(uuid) AS uuid_list, groupArray(timestamp) AS timestamp_list, groupArray(session_id) AS session_id_list, groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, events__pdi.person_id AS person_id, ifNull(events.event, '') AS path_item_ungrouped, events.uuid AS uuid, toTimeZone(events.timestamp, 'UTC') AS timestamp, ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, NULL AS groupings, multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-07 23:59:59', 6, 'UTC')))), not(startsWith(events.event, '$'))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE and(ifNull(equals(last_path_key, '4_step four'), 0), 1) + GROUP BY person_id SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -2352,8 +2616,8 @@ # --- # name: TestClickhousePaths.test_recording ''' - SELECT persons.id AS id, - persons.created_at AS created_at, + SELECT filterable_persons.id AS id, + filterable_persons.created_at AS created_at, source.event_count AS event_count, source.matching_events AS matching_events FROM @@ -2454,15 +2718,44 @@ JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index)) WHERE ifNull(equals(path_key, '2_/2'), 0) - GROUP BY person_id) AS source + GROUP BY person_id SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT argMax(toTimeZone(person.created_at, 'UTC'), person.version) AS created_at, person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, count(*) AS event_count + FROM + (SELECT person_id AS person_id, path AS path, conversion_time AS conversion_time, event_in_session_index AS event_in_session_index, concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, path_dropoff_key AS path_dropoff_key, final_uuid AS uuid, final_timestamp AS timestamp, final_session_id AS session_id, final_window_id AS window_id + FROM + (SELECT person_id AS person_id, joined_path_tuple.1 AS path, joined_path_tuple.2 AS conversion_time, joined_path_tuple.3 AS prev_path, event_in_session_index, session_index AS session_index, arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, arrayFilter((x, y) -> y, time, mapping) AS timings, arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, indexOf(compact_path, NULL) AS target_index, joined_path_tuple.4 AS final_uuid, joined_path_tuple.5 AS final_timestamp, joined_path_tuple.6 AS final_session_id, joined_path_tuple.7 AS final_window_id, arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, arraySlice(filtered_path, 1, 5) AS limited_path, if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, arraySlice(filtered_timings, 1, 5) AS limited_timings, if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, arraySlice(filtered_uuid, 1, 5) AS limited_uuid, if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, arraySlice(filtered_timestamp, 1, 5) AS limited_timestamp, if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, arraySlice(filtered_session_id, 1, 5) AS limited_session_id, if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, arraySlice(filtered_window_id, 1, 5) AS limited_window_id, arrayDifference(limited_timings) AS timings_diff, concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, path_time_tuple.1 AS path_basic, path_time_tuple.2 AS time, session_index, arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, path_time_tuple.4 AS uuid_items, path_time_tuple.5 AS timestamp_items, path_time_tuple.6 AS session_id_items, path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, groupArray(timestamp) AS timing_list, groupArray(path_item) AS path_list, groupArray(uuid) AS uuid_list, groupArray(timestamp) AS timestamp_list, groupArray(session_id) AS session_id_list, groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, events__pdi.person_id AS person_id, ifNull(if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event), '') AS path_item_ungrouped, events.uuid AS uuid, toTimeZone(events.timestamp, 'UTC') AS timestamp, ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, NULL AS groupings, multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-02 23:59:59', 6, 'UTC')))), equals(events.event, '$pageview')) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE ifNull(equals(path_key, '2_/2'), 0) + GROUP BY person_id SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -2491,8 +2784,8 @@ # --- # name: TestClickhousePaths.test_recording_for_dropoff ''' - SELECT persons.id AS id, - persons.created_at AS created_at, + SELECT filterable_persons.id AS id, + filterable_persons.created_at AS created_at, source.event_count AS event_count, source.matching_events AS matching_events FROM @@ -2594,15 +2887,45 @@ arrayEnumerate(limited_path_timings) AS event_in_session_index)) WHERE and(ifNull(equals(path_dropoff_key, '2_/2'), 0), ifNull(equals(path_dropoff_key, path_key), isNull(path_dropoff_key) and isNull(path_key))) - GROUP BY person_id) AS source + GROUP BY person_id SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT argMax(toTimeZone(person.created_at, 'UTC'), person.version) AS created_at, person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, count(*) AS event_count + FROM + (SELECT person_id AS person_id, path AS path, conversion_time AS conversion_time, event_in_session_index AS event_in_session_index, concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, path_dropoff_key AS path_dropoff_key, final_uuid AS uuid, final_timestamp AS timestamp, final_session_id AS session_id, final_window_id AS window_id + FROM + (SELECT person_id AS person_id, joined_path_tuple.1 AS path, joined_path_tuple.2 AS conversion_time, joined_path_tuple.3 AS prev_path, event_in_session_index, session_index AS session_index, arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, arrayFilter((x, y) -> y, time, mapping) AS timings, arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, indexOf(compact_path, NULL) AS target_index, joined_path_tuple.4 AS final_uuid, joined_path_tuple.5 AS final_timestamp, joined_path_tuple.6 AS final_session_id, joined_path_tuple.7 AS final_window_id, arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, arraySlice(filtered_path, 1, 5) AS limited_path, if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, arraySlice(filtered_timings, 1, 5) AS limited_timings, if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, arraySlice(filtered_uuid, 1, 5) AS limited_uuid, if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, arraySlice(filtered_timestamp, 1, 5) AS limited_timestamp, if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, arraySlice(filtered_session_id, 1, 5) AS limited_session_id, if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, arraySlice(filtered_window_id, 1, 5) AS limited_window_id, arrayDifference(limited_timings) AS timings_diff, concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, path_time_tuple.1 AS path_basic, path_time_tuple.2 AS time, session_index, arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, path_time_tuple.4 AS uuid_items, path_time_tuple.5 AS timestamp_items, path_time_tuple.6 AS session_id_items, path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, groupArray(timestamp) AS timing_list, groupArray(path_item) AS path_list, groupArray(uuid) AS uuid_list, groupArray(timestamp) AS timestamp_list, groupArray(session_id) AS session_id_list, groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, events__pdi.person_id AS person_id, ifNull(if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event), '') AS path_item_ungrouped, events.uuid AS uuid, toTimeZone(events.timestamp, 'UTC') AS timestamp, ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, NULL AS groupings, multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-02 23:59:59', 6, 'UTC')))), equals(events.event, '$pageview')) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE and(ifNull(equals(path_dropoff_key, '2_/2'), 0), ifNull(equals(path_dropoff_key, path_key), isNull(path_dropoff_key) + and isNull(path_key))) + GROUP BY person_id SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -2616,8 +2939,8 @@ # --- # name: TestClickhousePaths.test_recording_for_dropoff.1 ''' - SELECT persons.id AS id, - persons.created_at AS created_at, + SELECT filterable_persons.id AS id, + filterable_persons.created_at AS created_at, source.event_count AS event_count, source.matching_events AS matching_events FROM @@ -2719,15 +3042,45 @@ arrayEnumerate(limited_path_timings) AS event_in_session_index)) WHERE and(ifNull(equals(path_dropoff_key, '3_/3'), 0), ifNull(equals(path_dropoff_key, path_key), isNull(path_dropoff_key) and isNull(path_key))) - GROUP BY person_id) AS source + GROUP BY person_id SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT argMax(toTimeZone(person.created_at, 'UTC'), person.version) AS created_at, person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, count(*) AS event_count + FROM + (SELECT person_id AS person_id, path AS path, conversion_time AS conversion_time, event_in_session_index AS event_in_session_index, concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, path_dropoff_key AS path_dropoff_key, final_uuid AS uuid, final_timestamp AS timestamp, final_session_id AS session_id, final_window_id AS window_id + FROM + (SELECT person_id AS person_id, joined_path_tuple.1 AS path, joined_path_tuple.2 AS conversion_time, joined_path_tuple.3 AS prev_path, event_in_session_index, session_index AS session_index, arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, arrayFilter((x, y) -> y, time, mapping) AS timings, arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, indexOf(compact_path, NULL) AS target_index, joined_path_tuple.4 AS final_uuid, joined_path_tuple.5 AS final_timestamp, joined_path_tuple.6 AS final_session_id, joined_path_tuple.7 AS final_window_id, arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, arraySlice(filtered_path, 1, 5) AS limited_path, if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, arraySlice(filtered_timings, 1, 5) AS limited_timings, if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, arraySlice(filtered_uuid, 1, 5) AS limited_uuid, if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, arraySlice(filtered_timestamp, 1, 5) AS limited_timestamp, if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, arraySlice(filtered_session_id, 1, 5) AS limited_session_id, if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, arraySlice(filtered_window_id, 1, 5) AS limited_window_id, arrayDifference(limited_timings) AS timings_diff, concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, path_time_tuple.1 AS path_basic, path_time_tuple.2 AS time, session_index, arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, path_time_tuple.4 AS uuid_items, path_time_tuple.5 AS timestamp_items, path_time_tuple.6 AS session_id_items, path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, groupArray(timestamp) AS timing_list, groupArray(path_item) AS path_list, groupArray(uuid) AS uuid_list, groupArray(timestamp) AS timestamp_list, groupArray(session_id) AS session_id_list, groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, events__pdi.person_id AS person_id, ifNull(if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event), '') AS path_item_ungrouped, events.uuid AS uuid, toTimeZone(events.timestamp, 'UTC') AS timestamp, ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, NULL AS groupings, multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-02 23:59:59', 6, 'UTC')))), equals(events.event, '$pageview')) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE and(ifNull(equals(path_dropoff_key, '3_/3'), 0), ifNull(equals(path_dropoff_key, path_key), isNull(path_dropoff_key) + and isNull(path_key))) + GROUP BY person_id SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -2756,8 +3109,8 @@ # --- # name: TestClickhousePaths.test_recording_with_no_window_or_session_id ''' - SELECT persons.id AS id, - persons.created_at AS created_at, + SELECT filterable_persons.id AS id, + filterable_persons.created_at AS created_at, source.event_count AS event_count, source.matching_events AS matching_events FROM @@ -2858,15 +3211,44 @@ JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index)) WHERE ifNull(equals(path_key, '2_/2'), 0) - GROUP BY person_id) AS source + GROUP BY person_id SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT argMax(toTimeZone(person.created_at, 'UTC'), person.version) AS created_at, person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, count(*) AS event_count + FROM + (SELECT person_id AS person_id, path AS path, conversion_time AS conversion_time, event_in_session_index AS event_in_session_index, concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, path_dropoff_key AS path_dropoff_key, final_uuid AS uuid, final_timestamp AS timestamp, final_session_id AS session_id, final_window_id AS window_id + FROM + (SELECT person_id AS person_id, joined_path_tuple.1 AS path, joined_path_tuple.2 AS conversion_time, joined_path_tuple.3 AS prev_path, event_in_session_index, session_index AS session_index, arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, arrayFilter((x, y) -> y, time, mapping) AS timings, arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, indexOf(compact_path, NULL) AS target_index, joined_path_tuple.4 AS final_uuid, joined_path_tuple.5 AS final_timestamp, joined_path_tuple.6 AS final_session_id, joined_path_tuple.7 AS final_window_id, arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, arraySlice(filtered_path, 1, 5) AS limited_path, if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, arraySlice(filtered_timings, 1, 5) AS limited_timings, if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, arraySlice(filtered_uuid, 1, 5) AS limited_uuid, if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, arraySlice(filtered_timestamp, 1, 5) AS limited_timestamp, if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, arraySlice(filtered_session_id, 1, 5) AS limited_session_id, if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, arraySlice(filtered_window_id, 1, 5) AS limited_window_id, arrayDifference(limited_timings) AS timings_diff, concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, path_time_tuple.1 AS path_basic, path_time_tuple.2 AS time, session_index, arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, path_time_tuple.4 AS uuid_items, path_time_tuple.5 AS timestamp_items, path_time_tuple.6 AS session_id_items, path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, groupArray(timestamp) AS timing_list, groupArray(path_item) AS path_list, groupArray(uuid) AS uuid_list, groupArray(timestamp) AS timestamp_list, groupArray(session_id) AS session_id_list, groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, events__pdi.person_id AS person_id, ifNull(if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event), '') AS path_item_ungrouped, events.uuid AS uuid, toTimeZone(events.timestamp, 'UTC') AS timestamp, ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, NULL AS groupings, multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-02 23:59:59', 6, 'UTC')))), equals(events.event, '$pageview')) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE ifNull(equals(path_key, '2_/2'), 0) + GROUP BY person_id SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -2895,8 +3277,8 @@ # --- # name: TestClickhousePaths.test_recording_with_start_and_end ''' - SELECT persons.id AS id, - persons.created_at AS created_at, + SELECT filterable_persons.id AS id, + filterable_persons.created_at AS created_at, source.event_count AS event_count, source.matching_events AS matching_events FROM @@ -3006,15 +3388,45 @@ arrayEnumerate(limited_path_timings) AS event_in_session_index WHERE and(ifNull(greater(start_target_index, 0), 0), ifNull(greater(end_target_index, 0), 0)))) WHERE ifNull(equals(path_key, '2_/2'), 0) - GROUP BY person_id) AS source + GROUP BY person_id SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT argMax(toTimeZone(person.created_at, 'UTC'), person.version) AS created_at, person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, count(*) AS event_count + FROM + (SELECT person_id AS person_id, path AS path, conversion_time AS conversion_time, event_in_session_index AS event_in_session_index, concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, path_dropoff_key AS path_dropoff_key, final_uuid AS uuid, final_timestamp AS timestamp, final_session_id AS session_id, final_window_id AS window_id + FROM + (SELECT person_id AS person_id, joined_path_tuple.1 AS path, joined_path_tuple.2 AS conversion_time, joined_path_tuple.3 AS prev_path, event_in_session_index, session_index AS session_index, arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, arrayFilter((x, y) -> y, time, mapping) AS timings, arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, indexOf(compact_path, '/3') AS target_index, joined_path_tuple.4 AS final_uuid, joined_path_tuple.5 AS final_timestamp, joined_path_tuple.6 AS final_session_id, joined_path_tuple.7 AS final_window_id, arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, indexOf(compact_path, '/1') AS start_target_index, if(ifNull(greater(start_target_index, 0), 0), arraySlice(compact_path, start_target_index), compact_path) AS start_filtered_path, indexOf(start_filtered_path, '/3') AS end_target_index, if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_path, end_target_index), start_filtered_path) AS filtered_path, if(ifNull(greater(length(filtered_path), 5), 0), arrayConcat(arraySlice(filtered_path, 1, intDiv(5, 2)), ['...'], arraySlice(filtered_path, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_path) AS limited_path, if(ifNull(greater(start_target_index, 0), 0), arraySlice(timings, start_target_index), timings) AS start_filtered_timings, if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_timings, end_target_index), start_filtered_timings) AS filtered_timings, if(ifNull(greater(length(filtered_timings), 5), 0), arrayConcat(arraySlice(filtered_timings, 1, intDiv(5, 2)), [filtered_timings[plus(1, intDiv(5, 2))]], arraySlice(filtered_timings, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_timings) AS limited_timings, if(ifNull(greater(start_target_index, 0), 0), arraySlice(uuid, start_target_index), uuid) AS start_filtered_uuid, if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_uuid, end_target_index), start_filtered_uuid) AS filtered_uuid, if(ifNull(greater(length(filtered_uuid), 5), 0), arrayConcat(arraySlice(filtered_uuid, 1, intDiv(5, 2)), [filtered_uuid[plus(1, intDiv(5, 2))]], arraySlice(filtered_uuid, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_uuid) AS limited_uuid, if(ifNull(greater(start_target_index, 0), 0), arraySlice(timestamp, start_target_index), timestamp) AS start_filtered_timestamp, if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_timestamp, end_target_index), start_filtered_timestamp) AS filtered_timestamp, if(ifNull(greater(length(filtered_timestamp), 5), 0), arrayConcat(arraySlice(filtered_timestamp, 1, intDiv(5, 2)), [filtered_timestamp[plus(1, intDiv(5, 2))]], arraySlice(filtered_timestamp, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_timestamp) AS limited_timestamp, if(ifNull(greater(start_target_index, 0), 0), arraySlice(session_id, start_target_index), session_id) AS start_filtered_session_id, if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_session_id, end_target_index), start_filtered_session_id) AS filtered_session_id, if(ifNull(greater(length(filtered_session_id), 5), 0), arrayConcat(arraySlice(filtered_session_id, 1, intDiv(5, 2)), [filtered_session_id[plus(1, intDiv(5, 2))]], arraySlice(filtered_session_id, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_session_id) AS limited_session_id, if(ifNull(greater(start_target_index, 0), 0), arraySlice(window_id, start_target_index), window_id) AS start_filtered_window_id, if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_window_id, end_target_index), start_filtered_window_id) AS filtered_window_id, if(ifNull(greater(length(filtered_window_id), 5), 0), arrayConcat(arraySlice(filtered_window_id, 1, intDiv(5, 2)), [filtered_window_id[plus(1, intDiv(5, 2))]], arraySlice(filtered_window_id, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_window_id) AS limited_window_id, arrayDifference(limited_timings) AS timings_diff, concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, path_time_tuple.1 AS path_basic, path_time_tuple.2 AS time, session_index, arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, path_time_tuple.4 AS uuid_items, path_time_tuple.5 AS timestamp_items, path_time_tuple.6 AS session_id_items, path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, groupArray(timestamp) AS timing_list, groupArray(path_item) AS path_list, groupArray(uuid) AS uuid_list, groupArray(timestamp) AS timestamp_list, groupArray(session_id) AS session_id_list, groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, events__pdi.person_id AS person_id, ifNull(if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event), '') AS path_item_ungrouped, events.uuid AS uuid, toTimeZone(events.timestamp, 'UTC') AS timestamp, ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, NULL AS groupings, multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-02 23:59:59', 6, 'UTC')))), equals(events.event, '$pageview')) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index + WHERE and(ifNull(greater(start_target_index, 0), 0), ifNull(greater(end_target_index, 0), 0)))) + WHERE ifNull(equals(path_key, '2_/2'), 0) + GROUP BY person_id SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -3214,7 +3626,7 @@ # --- # name: TestClickhousePaths.test_start_and_end.1 ''' - SELECT persons.id AS id + SELECT filterable_persons.id AS id FROM (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, @@ -3322,14 +3734,44 @@ arrayEnumerate(limited_path_timings) AS event_in_session_index WHERE and(ifNull(greater(start_target_index, 0), 0), ifNull(greater(end_target_index, 0), 0)))) WHERE and(ifNull(equals(last_path_key, '1_/5'), 0), ifNull(equals(path_key, '2_/about'), 0)) - GROUP BY person_id) AS source + GROUP BY person_id SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, count(*) AS event_count + FROM + (SELECT person_id AS person_id, path AS path, conversion_time AS conversion_time, event_in_session_index AS event_in_session_index, concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, path_dropoff_key AS path_dropoff_key, final_uuid AS uuid, final_timestamp AS timestamp, final_session_id AS session_id, final_window_id AS window_id + FROM + (SELECT person_id AS person_id, joined_path_tuple.1 AS path, joined_path_tuple.2 AS conversion_time, joined_path_tuple.3 AS prev_path, event_in_session_index, session_index AS session_index, arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, arrayFilter((x, y) -> y, time, mapping) AS timings, arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, indexOf(compact_path, '/about') AS target_index, joined_path_tuple.4 AS final_uuid, joined_path_tuple.5 AS final_timestamp, joined_path_tuple.6 AS final_session_id, joined_path_tuple.7 AS final_window_id, arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, indexOf(compact_path, '/5') AS start_target_index, if(ifNull(greater(start_target_index, 0), 0), arraySlice(compact_path, start_target_index), compact_path) AS start_filtered_path, indexOf(start_filtered_path, '/about') AS end_target_index, if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_path, end_target_index), start_filtered_path) AS filtered_path, if(ifNull(greater(length(filtered_path), 5), 0), arrayConcat(arraySlice(filtered_path, 1, intDiv(5, 2)), ['...'], arraySlice(filtered_path, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_path) AS limited_path, if(ifNull(greater(start_target_index, 0), 0), arraySlice(timings, start_target_index), timings) AS start_filtered_timings, if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_timings, end_target_index), start_filtered_timings) AS filtered_timings, if(ifNull(greater(length(filtered_timings), 5), 0), arrayConcat(arraySlice(filtered_timings, 1, intDiv(5, 2)), [filtered_timings[plus(1, intDiv(5, 2))]], arraySlice(filtered_timings, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_timings) AS limited_timings, if(ifNull(greater(start_target_index, 0), 0), arraySlice(uuid, start_target_index), uuid) AS start_filtered_uuid, if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_uuid, end_target_index), start_filtered_uuid) AS filtered_uuid, if(ifNull(greater(length(filtered_uuid), 5), 0), arrayConcat(arraySlice(filtered_uuid, 1, intDiv(5, 2)), [filtered_uuid[plus(1, intDiv(5, 2))]], arraySlice(filtered_uuid, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_uuid) AS limited_uuid, if(ifNull(greater(start_target_index, 0), 0), arraySlice(timestamp, start_target_index), timestamp) AS start_filtered_timestamp, if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_timestamp, end_target_index), start_filtered_timestamp) AS filtered_timestamp, if(ifNull(greater(length(filtered_timestamp), 5), 0), arrayConcat(arraySlice(filtered_timestamp, 1, intDiv(5, 2)), [filtered_timestamp[plus(1, intDiv(5, 2))]], arraySlice(filtered_timestamp, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_timestamp) AS limited_timestamp, if(ifNull(greater(start_target_index, 0), 0), arraySlice(session_id, start_target_index), session_id) AS start_filtered_session_id, if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_session_id, end_target_index), start_filtered_session_id) AS filtered_session_id, if(ifNull(greater(length(filtered_session_id), 5), 0), arrayConcat(arraySlice(filtered_session_id, 1, intDiv(5, 2)), [filtered_session_id[plus(1, intDiv(5, 2))]], arraySlice(filtered_session_id, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_session_id) AS limited_session_id, if(ifNull(greater(start_target_index, 0), 0), arraySlice(window_id, start_target_index), window_id) AS start_filtered_window_id, if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_window_id, end_target_index), start_filtered_window_id) AS filtered_window_id, if(ifNull(greater(length(filtered_window_id), 5), 0), arrayConcat(arraySlice(filtered_window_id, 1, intDiv(5, 2)), [filtered_window_id[plus(1, intDiv(5, 2))]], arraySlice(filtered_window_id, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_window_id) AS limited_window_id, arrayDifference(limited_timings) AS timings_diff, concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, path_time_tuple.1 AS path_basic, path_time_tuple.2 AS time, session_index, arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, path_time_tuple.4 AS uuid_items, path_time_tuple.5 AS timestamp_items, path_time_tuple.6 AS session_id_items, path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, groupArray(timestamp) AS timing_list, groupArray(path_item) AS path_list, groupArray(uuid) AS uuid_list, groupArray(timestamp) AS timestamp_list, groupArray(session_id) AS session_id_list, groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, events__pdi.person_id AS person_id, ifNull(if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event), '') AS path_item_ungrouped, events.uuid AS uuid, toTimeZone(events.timestamp, 'UTC') AS timestamp, ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, NULL AS groupings, multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-05-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-07 23:59:59', 6, 'UTC')))), equals(events.event, '$pageview')) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index + WHERE and(ifNull(greater(start_target_index, 0), 0), ifNull(greater(end_target_index, 0), 0)))) + WHERE and(ifNull(equals(last_path_key, '1_/5'), 0), ifNull(equals(path_key, '2_/about'), 0)) + GROUP BY person_id SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -3431,7 +3873,7 @@ # --- # name: TestClickhousePaths.test_start_and_end.3 ''' - SELECT persons.id AS id + SELECT filterable_persons.id AS id FROM (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, @@ -3539,14 +3981,44 @@ arrayEnumerate(limited_path_timings) AS event_in_session_index WHERE and(ifNull(greater(start_target_index, 0), 0), ifNull(greater(end_target_index, 0), 0)))) WHERE and(ifNull(equals(last_path_key, '3_...'), 0), ifNull(equals(path_key, '4_/5'), 0)) - GROUP BY person_id) AS source + GROUP BY person_id SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, count(*) AS event_count + FROM + (SELECT person_id AS person_id, path AS path, conversion_time AS conversion_time, event_in_session_index AS event_in_session_index, concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, path_dropoff_key AS path_dropoff_key, final_uuid AS uuid, final_timestamp AS timestamp, final_session_id AS session_id, final_window_id AS window_id + FROM + (SELECT person_id AS person_id, joined_path_tuple.1 AS path, joined_path_tuple.2 AS conversion_time, joined_path_tuple.3 AS prev_path, event_in_session_index, session_index AS session_index, arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, arrayFilter((x, y) -> y, time, mapping) AS timings, arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, indexOf(compact_path, '/about') AS target_index, joined_path_tuple.4 AS final_uuid, joined_path_tuple.5 AS final_timestamp, joined_path_tuple.6 AS final_session_id, joined_path_tuple.7 AS final_window_id, arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, indexOf(compact_path, '/2') AS start_target_index, if(ifNull(greater(start_target_index, 0), 0), arraySlice(compact_path, start_target_index), compact_path) AS start_filtered_path, indexOf(start_filtered_path, '/about') AS end_target_index, if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_path, end_target_index), start_filtered_path) AS filtered_path, if(ifNull(greater(length(filtered_path), 4), 0), arrayConcat(arraySlice(filtered_path, 1, intDiv(4, 2)), ['...'], arraySlice(filtered_path, multiply(-1, intDiv(4, 2)), intDiv(4, 2))), filtered_path) AS limited_path, if(ifNull(greater(start_target_index, 0), 0), arraySlice(timings, start_target_index), timings) AS start_filtered_timings, if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_timings, end_target_index), start_filtered_timings) AS filtered_timings, if(ifNull(greater(length(filtered_timings), 4), 0), arrayConcat(arraySlice(filtered_timings, 1, intDiv(4, 2)), [filtered_timings[plus(1, intDiv(4, 2))]], arraySlice(filtered_timings, multiply(-1, intDiv(4, 2)), intDiv(4, 2))), filtered_timings) AS limited_timings, if(ifNull(greater(start_target_index, 0), 0), arraySlice(uuid, start_target_index), uuid) AS start_filtered_uuid, if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_uuid, end_target_index), start_filtered_uuid) AS filtered_uuid, if(ifNull(greater(length(filtered_uuid), 4), 0), arrayConcat(arraySlice(filtered_uuid, 1, intDiv(4, 2)), [filtered_uuid[plus(1, intDiv(4, 2))]], arraySlice(filtered_uuid, multiply(-1, intDiv(4, 2)), intDiv(4, 2))), filtered_uuid) AS limited_uuid, if(ifNull(greater(start_target_index, 0), 0), arraySlice(timestamp, start_target_index), timestamp) AS start_filtered_timestamp, if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_timestamp, end_target_index), start_filtered_timestamp) AS filtered_timestamp, if(ifNull(greater(length(filtered_timestamp), 4), 0), arrayConcat(arraySlice(filtered_timestamp, 1, intDiv(4, 2)), [filtered_timestamp[plus(1, intDiv(4, 2))]], arraySlice(filtered_timestamp, multiply(-1, intDiv(4, 2)), intDiv(4, 2))), filtered_timestamp) AS limited_timestamp, if(ifNull(greater(start_target_index, 0), 0), arraySlice(session_id, start_target_index), session_id) AS start_filtered_session_id, if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_session_id, end_target_index), start_filtered_session_id) AS filtered_session_id, if(ifNull(greater(length(filtered_session_id), 4), 0), arrayConcat(arraySlice(filtered_session_id, 1, intDiv(4, 2)), [filtered_session_id[plus(1, intDiv(4, 2))]], arraySlice(filtered_session_id, multiply(-1, intDiv(4, 2)), intDiv(4, 2))), filtered_session_id) AS limited_session_id, if(ifNull(greater(start_target_index, 0), 0), arraySlice(window_id, start_target_index), window_id) AS start_filtered_window_id, if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_window_id, end_target_index), start_filtered_window_id) AS filtered_window_id, if(ifNull(greater(length(filtered_window_id), 4), 0), arrayConcat(arraySlice(filtered_window_id, 1, intDiv(4, 2)), [filtered_window_id[plus(1, intDiv(4, 2))]], arraySlice(filtered_window_id, multiply(-1, intDiv(4, 2)), intDiv(4, 2))), filtered_window_id) AS limited_window_id, arrayDifference(limited_timings) AS timings_diff, concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, path_time_tuple.1 AS path_basic, path_time_tuple.2 AS time, session_index, arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, path_time_tuple.4 AS uuid_items, path_time_tuple.5 AS timestamp_items, path_time_tuple.6 AS session_id_items, path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, groupArray(timestamp) AS timing_list, groupArray(path_item) AS path_list, groupArray(uuid) AS uuid_list, groupArray(timestamp) AS timestamp_list, groupArray(session_id) AS session_id_list, groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, events__pdi.person_id AS person_id, ifNull(if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event), '') AS path_item_ungrouped, events.uuid AS uuid, toTimeZone(events.timestamp, 'UTC') AS timestamp, ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, NULL AS groupings, multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-05-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-07 23:59:59', 6, 'UTC')))), equals(events.event, '$pageview')) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index + WHERE and(ifNull(greater(start_target_index, 0), 0), ifNull(greater(end_target_index, 0), 0)))) + WHERE and(ifNull(equals(last_path_key, '3_...'), 0), ifNull(equals(path_key, '4_/5'), 0)) + GROUP BY person_id SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -3648,7 +4120,7 @@ # --- # name: TestClickhousePaths.test_start_and_end_materialized.1 ''' - SELECT persons.id AS id + SELECT filterable_persons.id AS id FROM (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, @@ -3756,14 +4228,44 @@ arrayEnumerate(limited_path_timings) AS event_in_session_index WHERE and(ifNull(greater(start_target_index, 0), 0), ifNull(greater(end_target_index, 0), 0)))) WHERE and(ifNull(equals(last_path_key, '1_/5'), 0), ifNull(equals(path_key, '2_/about'), 0)) - GROUP BY person_id) AS source + GROUP BY person_id SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, count(*) AS event_count + FROM + (SELECT person_id AS person_id, path AS path, conversion_time AS conversion_time, event_in_session_index AS event_in_session_index, concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, path_dropoff_key AS path_dropoff_key, final_uuid AS uuid, final_timestamp AS timestamp, final_session_id AS session_id, final_window_id AS window_id + FROM + (SELECT person_id AS person_id, joined_path_tuple.1 AS path, joined_path_tuple.2 AS conversion_time, joined_path_tuple.3 AS prev_path, event_in_session_index, session_index AS session_index, arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, arrayFilter((x, y) -> y, time, mapping) AS timings, arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, indexOf(compact_path, '/about') AS target_index, joined_path_tuple.4 AS final_uuid, joined_path_tuple.5 AS final_timestamp, joined_path_tuple.6 AS final_session_id, joined_path_tuple.7 AS final_window_id, arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, indexOf(compact_path, '/5') AS start_target_index, if(ifNull(greater(start_target_index, 0), 0), arraySlice(compact_path, start_target_index), compact_path) AS start_filtered_path, indexOf(start_filtered_path, '/about') AS end_target_index, if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_path, end_target_index), start_filtered_path) AS filtered_path, if(ifNull(greater(length(filtered_path), 5), 0), arrayConcat(arraySlice(filtered_path, 1, intDiv(5, 2)), ['...'], arraySlice(filtered_path, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_path) AS limited_path, if(ifNull(greater(start_target_index, 0), 0), arraySlice(timings, start_target_index), timings) AS start_filtered_timings, if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_timings, end_target_index), start_filtered_timings) AS filtered_timings, if(ifNull(greater(length(filtered_timings), 5), 0), arrayConcat(arraySlice(filtered_timings, 1, intDiv(5, 2)), [filtered_timings[plus(1, intDiv(5, 2))]], arraySlice(filtered_timings, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_timings) AS limited_timings, if(ifNull(greater(start_target_index, 0), 0), arraySlice(uuid, start_target_index), uuid) AS start_filtered_uuid, if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_uuid, end_target_index), start_filtered_uuid) AS filtered_uuid, if(ifNull(greater(length(filtered_uuid), 5), 0), arrayConcat(arraySlice(filtered_uuid, 1, intDiv(5, 2)), [filtered_uuid[plus(1, intDiv(5, 2))]], arraySlice(filtered_uuid, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_uuid) AS limited_uuid, if(ifNull(greater(start_target_index, 0), 0), arraySlice(timestamp, start_target_index), timestamp) AS start_filtered_timestamp, if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_timestamp, end_target_index), start_filtered_timestamp) AS filtered_timestamp, if(ifNull(greater(length(filtered_timestamp), 5), 0), arrayConcat(arraySlice(filtered_timestamp, 1, intDiv(5, 2)), [filtered_timestamp[plus(1, intDiv(5, 2))]], arraySlice(filtered_timestamp, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_timestamp) AS limited_timestamp, if(ifNull(greater(start_target_index, 0), 0), arraySlice(session_id, start_target_index), session_id) AS start_filtered_session_id, if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_session_id, end_target_index), start_filtered_session_id) AS filtered_session_id, if(ifNull(greater(length(filtered_session_id), 5), 0), arrayConcat(arraySlice(filtered_session_id, 1, intDiv(5, 2)), [filtered_session_id[plus(1, intDiv(5, 2))]], arraySlice(filtered_session_id, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_session_id) AS limited_session_id, if(ifNull(greater(start_target_index, 0), 0), arraySlice(window_id, start_target_index), window_id) AS start_filtered_window_id, if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_window_id, end_target_index), start_filtered_window_id) AS filtered_window_id, if(ifNull(greater(length(filtered_window_id), 5), 0), arrayConcat(arraySlice(filtered_window_id, 1, intDiv(5, 2)), [filtered_window_id[plus(1, intDiv(5, 2))]], arraySlice(filtered_window_id, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_window_id) AS limited_window_id, arrayDifference(limited_timings) AS timings_diff, concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, path_time_tuple.1 AS path_basic, path_time_tuple.2 AS time, session_index, arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, path_time_tuple.4 AS uuid_items, path_time_tuple.5 AS timestamp_items, path_time_tuple.6 AS session_id_items, path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, groupArray(timestamp) AS timing_list, groupArray(path_item) AS path_list, groupArray(uuid) AS uuid_list, groupArray(timestamp) AS timestamp_list, groupArray(session_id) AS session_id_list, groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, events__pdi.person_id AS person_id, ifNull(if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(nullIf(nullIf(events.`mat_$current_url`, ''), 'null'), ''), '(.)/$', '\\1'), events.event), '') AS path_item_ungrouped, events.uuid AS uuid, toTimeZone(events.timestamp, 'UTC') AS timestamp, ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, NULL AS groupings, multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-05-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-07 23:59:59', 6, 'UTC')))), equals(events.event, '$pageview')) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index + WHERE and(ifNull(greater(start_target_index, 0), 0), ifNull(greater(end_target_index, 0), 0)))) + WHERE and(ifNull(equals(last_path_key, '1_/5'), 0), ifNull(equals(path_key, '2_/about'), 0)) + GROUP BY person_id SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -3865,7 +4367,7 @@ # --- # name: TestClickhousePaths.test_start_and_end_materialized.3 ''' - SELECT persons.id AS id + SELECT filterable_persons.id AS id FROM (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, @@ -3973,14 +4475,44 @@ arrayEnumerate(limited_path_timings) AS event_in_session_index WHERE and(ifNull(greater(start_target_index, 0), 0), ifNull(greater(end_target_index, 0), 0)))) WHERE and(ifNull(equals(last_path_key, '3_...'), 0), ifNull(equals(path_key, '4_/5'), 0)) - GROUP BY person_id) AS source + GROUP BY person_id SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, count(*) AS event_count + FROM + (SELECT person_id AS person_id, path AS path, conversion_time AS conversion_time, event_in_session_index AS event_in_session_index, concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, path_dropoff_key AS path_dropoff_key, final_uuid AS uuid, final_timestamp AS timestamp, final_session_id AS session_id, final_window_id AS window_id + FROM + (SELECT person_id AS person_id, joined_path_tuple.1 AS path, joined_path_tuple.2 AS conversion_time, joined_path_tuple.3 AS prev_path, event_in_session_index, session_index AS session_index, arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, arrayFilter((x, y) -> y, time, mapping) AS timings, arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, indexOf(compact_path, '/about') AS target_index, joined_path_tuple.4 AS final_uuid, joined_path_tuple.5 AS final_timestamp, joined_path_tuple.6 AS final_session_id, joined_path_tuple.7 AS final_window_id, arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, indexOf(compact_path, '/2') AS start_target_index, if(ifNull(greater(start_target_index, 0), 0), arraySlice(compact_path, start_target_index), compact_path) AS start_filtered_path, indexOf(start_filtered_path, '/about') AS end_target_index, if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_path, end_target_index), start_filtered_path) AS filtered_path, if(ifNull(greater(length(filtered_path), 4), 0), arrayConcat(arraySlice(filtered_path, 1, intDiv(4, 2)), ['...'], arraySlice(filtered_path, multiply(-1, intDiv(4, 2)), intDiv(4, 2))), filtered_path) AS limited_path, if(ifNull(greater(start_target_index, 0), 0), arraySlice(timings, start_target_index), timings) AS start_filtered_timings, if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_timings, end_target_index), start_filtered_timings) AS filtered_timings, if(ifNull(greater(length(filtered_timings), 4), 0), arrayConcat(arraySlice(filtered_timings, 1, intDiv(4, 2)), [filtered_timings[plus(1, intDiv(4, 2))]], arraySlice(filtered_timings, multiply(-1, intDiv(4, 2)), intDiv(4, 2))), filtered_timings) AS limited_timings, if(ifNull(greater(start_target_index, 0), 0), arraySlice(uuid, start_target_index), uuid) AS start_filtered_uuid, if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_uuid, end_target_index), start_filtered_uuid) AS filtered_uuid, if(ifNull(greater(length(filtered_uuid), 4), 0), arrayConcat(arraySlice(filtered_uuid, 1, intDiv(4, 2)), [filtered_uuid[plus(1, intDiv(4, 2))]], arraySlice(filtered_uuid, multiply(-1, intDiv(4, 2)), intDiv(4, 2))), filtered_uuid) AS limited_uuid, if(ifNull(greater(start_target_index, 0), 0), arraySlice(timestamp, start_target_index), timestamp) AS start_filtered_timestamp, if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_timestamp, end_target_index), start_filtered_timestamp) AS filtered_timestamp, if(ifNull(greater(length(filtered_timestamp), 4), 0), arrayConcat(arraySlice(filtered_timestamp, 1, intDiv(4, 2)), [filtered_timestamp[plus(1, intDiv(4, 2))]], arraySlice(filtered_timestamp, multiply(-1, intDiv(4, 2)), intDiv(4, 2))), filtered_timestamp) AS limited_timestamp, if(ifNull(greater(start_target_index, 0), 0), arraySlice(session_id, start_target_index), session_id) AS start_filtered_session_id, if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_session_id, end_target_index), start_filtered_session_id) AS filtered_session_id, if(ifNull(greater(length(filtered_session_id), 4), 0), arrayConcat(arraySlice(filtered_session_id, 1, intDiv(4, 2)), [filtered_session_id[plus(1, intDiv(4, 2))]], arraySlice(filtered_session_id, multiply(-1, intDiv(4, 2)), intDiv(4, 2))), filtered_session_id) AS limited_session_id, if(ifNull(greater(start_target_index, 0), 0), arraySlice(window_id, start_target_index), window_id) AS start_filtered_window_id, if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_window_id, end_target_index), start_filtered_window_id) AS filtered_window_id, if(ifNull(greater(length(filtered_window_id), 4), 0), arrayConcat(arraySlice(filtered_window_id, 1, intDiv(4, 2)), [filtered_window_id[plus(1, intDiv(4, 2))]], arraySlice(filtered_window_id, multiply(-1, intDiv(4, 2)), intDiv(4, 2))), filtered_window_id) AS limited_window_id, arrayDifference(limited_timings) AS timings_diff, concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, path_time_tuple.1 AS path_basic, path_time_tuple.2 AS time, session_index, arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, path_time_tuple.4 AS uuid_items, path_time_tuple.5 AS timestamp_items, path_time_tuple.6 AS session_id_items, path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, groupArray(timestamp) AS timing_list, groupArray(path_item) AS path_list, groupArray(uuid) AS uuid_list, groupArray(timestamp) AS timestamp_list, groupArray(session_id) AS session_id_list, groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, events__pdi.person_id AS person_id, ifNull(if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(nullIf(nullIf(events.`mat_$current_url`, ''), 'null'), ''), '(.)/$', '\\1'), events.event), '') AS path_item_ungrouped, events.uuid AS uuid, toTimeZone(events.timestamp, 'UTC') AS timestamp, ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, NULL AS groupings, multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-05-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-07 23:59:59', 6, 'UTC')))), equals(events.event, '$pageview')) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index + WHERE and(ifNull(greater(start_target_index, 0), 0), ifNull(greater(end_target_index, 0), 0)))) + WHERE and(ifNull(equals(last_path_key, '3_...'), 0), ifNull(equals(path_key, '4_/5'), 0)) + GROUP BY person_id SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -4244,7 +4776,7 @@ # --- # name: TestClickhousePaths.test_step_limit.1 ''' - SELECT persons.id AS id + SELECT filterable_persons.id AS id FROM (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, @@ -4343,14 +4875,43 @@ JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index)) WHERE and(ifNull(equals(last_path_key, '1_/1'), 0), ifNull(equals(path_key, '2_/2'), 0)) - GROUP BY person_id) AS source + GROUP BY person_id SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, count(*) AS event_count + FROM + (SELECT person_id AS person_id, path AS path, conversion_time AS conversion_time, event_in_session_index AS event_in_session_index, concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, path_dropoff_key AS path_dropoff_key, final_uuid AS uuid, final_timestamp AS timestamp, final_session_id AS session_id, final_window_id AS window_id + FROM + (SELECT person_id AS person_id, joined_path_tuple.1 AS path, joined_path_tuple.2 AS conversion_time, joined_path_tuple.3 AS prev_path, event_in_session_index, session_index AS session_index, arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, arrayFilter((x, y) -> y, time, mapping) AS timings, arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, indexOf(compact_path, NULL) AS target_index, joined_path_tuple.4 AS final_uuid, joined_path_tuple.5 AS final_timestamp, joined_path_tuple.6 AS final_session_id, joined_path_tuple.7 AS final_window_id, arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, arraySlice(filtered_path, 1, 2) AS limited_path, if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, arraySlice(filtered_timings, 1, 2) AS limited_timings, if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, arraySlice(filtered_uuid, 1, 2) AS limited_uuid, if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, arraySlice(filtered_timestamp, 1, 2) AS limited_timestamp, if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, arraySlice(filtered_session_id, 1, 2) AS limited_session_id, if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, arraySlice(filtered_window_id, 1, 2) AS limited_window_id, arrayDifference(limited_timings) AS timings_diff, concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, path_time_tuple.1 AS path_basic, path_time_tuple.2 AS time, session_index, arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, path_time_tuple.4 AS uuid_items, path_time_tuple.5 AS timestamp_items, path_time_tuple.6 AS session_id_items, path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, groupArray(timestamp) AS timing_list, groupArray(path_item) AS path_list, groupArray(uuid) AS uuid_list, groupArray(timestamp) AS timestamp_list, groupArray(session_id) AS session_id_list, groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, events__pdi.person_id AS person_id, ifNull(if(equals(events.event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event)), '') AS path_item_ungrouped, events.uuid AS uuid, toTimeZone(events.timestamp, 'UTC') AS timestamp, ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, NULL AS groupings, multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2011-12-31 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-07 23:59:59', 6, 'UTC'))))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE and(ifNull(equals(last_path_key, '1_/1'), 0), ifNull(equals(path_key, '2_/2'), 0)) + GROUP BY person_id SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -4364,7 +4925,7 @@ # --- # name: TestClickhousePaths.test_step_limit.2 ''' - SELECT persons.id AS id + SELECT filterable_persons.id AS id FROM (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, @@ -4463,14 +5024,43 @@ JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index)) WHERE and(ifNull(equals(last_path_key, '2_/2'), 0), ifNull(equals(path_key, '3_/3'), 0)) - GROUP BY person_id) AS source + GROUP BY person_id SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, count(*) AS event_count + FROM + (SELECT person_id AS person_id, path AS path, conversion_time AS conversion_time, event_in_session_index AS event_in_session_index, concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, path_dropoff_key AS path_dropoff_key, final_uuid AS uuid, final_timestamp AS timestamp, final_session_id AS session_id, final_window_id AS window_id + FROM + (SELECT person_id AS person_id, joined_path_tuple.1 AS path, joined_path_tuple.2 AS conversion_time, joined_path_tuple.3 AS prev_path, event_in_session_index, session_index AS session_index, arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, arrayFilter((x, y) -> y, time, mapping) AS timings, arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, indexOf(compact_path, NULL) AS target_index, joined_path_tuple.4 AS final_uuid, joined_path_tuple.5 AS final_timestamp, joined_path_tuple.6 AS final_session_id, joined_path_tuple.7 AS final_window_id, arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, arraySlice(filtered_path, 1, 2) AS limited_path, if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, arraySlice(filtered_timings, 1, 2) AS limited_timings, if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, arraySlice(filtered_uuid, 1, 2) AS limited_uuid, if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, arraySlice(filtered_timestamp, 1, 2) AS limited_timestamp, if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, arraySlice(filtered_session_id, 1, 2) AS limited_session_id, if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, arraySlice(filtered_window_id, 1, 2) AS limited_window_id, arrayDifference(limited_timings) AS timings_diff, concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, path_time_tuple.1 AS path_basic, path_time_tuple.2 AS time, session_index, arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, path_time_tuple.4 AS uuid_items, path_time_tuple.5 AS timestamp_items, path_time_tuple.6 AS session_id_items, path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, groupArray(timestamp) AS timing_list, groupArray(path_item) AS path_list, groupArray(uuid) AS uuid_list, groupArray(timestamp) AS timestamp_list, groupArray(session_id) AS session_id_list, groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, events__pdi.person_id AS person_id, ifNull(if(equals(events.event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event)), '') AS path_item_ungrouped, events.uuid AS uuid, toTimeZone(events.timestamp, 'UTC') AS timestamp, ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, NULL AS groupings, multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2011-12-31 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-07 23:59:59', 6, 'UTC'))))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE and(ifNull(equals(last_path_key, '2_/2'), 0), ifNull(equals(path_key, '3_/3'), 0)) + GROUP BY person_id SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -4567,7 +5157,7 @@ # --- # name: TestClickhousePaths.test_step_limit.4 ''' - SELECT persons.id AS id + SELECT filterable_persons.id AS id FROM (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, @@ -4666,14 +5256,43 @@ JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index)) WHERE and(ifNull(equals(last_path_key, '2_/2'), 0), ifNull(equals(path_key, '3_/3'), 0)) - GROUP BY person_id) AS source + GROUP BY person_id SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, count(*) AS event_count + FROM + (SELECT person_id AS person_id, path AS path, conversion_time AS conversion_time, event_in_session_index AS event_in_session_index, concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, path_dropoff_key AS path_dropoff_key, final_uuid AS uuid, final_timestamp AS timestamp, final_session_id AS session_id, final_window_id AS window_id + FROM + (SELECT person_id AS person_id, joined_path_tuple.1 AS path, joined_path_tuple.2 AS conversion_time, joined_path_tuple.3 AS prev_path, event_in_session_index, session_index AS session_index, arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, arrayFilter((x, y) -> y, time, mapping) AS timings, arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, indexOf(compact_path, NULL) AS target_index, joined_path_tuple.4 AS final_uuid, joined_path_tuple.5 AS final_timestamp, joined_path_tuple.6 AS final_session_id, joined_path_tuple.7 AS final_window_id, arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, arraySlice(filtered_path, 1, 3) AS limited_path, if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, arraySlice(filtered_timings, 1, 3) AS limited_timings, if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, arraySlice(filtered_uuid, 1, 3) AS limited_uuid, if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, arraySlice(filtered_timestamp, 1, 3) AS limited_timestamp, if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, arraySlice(filtered_session_id, 1, 3) AS limited_session_id, if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, arraySlice(filtered_window_id, 1, 3) AS limited_window_id, arrayDifference(limited_timings) AS timings_diff, concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, path_time_tuple.1 AS path_basic, path_time_tuple.2 AS time, session_index, arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, path_time_tuple.4 AS uuid_items, path_time_tuple.5 AS timestamp_items, path_time_tuple.6 AS session_id_items, path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, groupArray(timestamp) AS timing_list, groupArray(path_item) AS path_list, groupArray(uuid) AS uuid_list, groupArray(timestamp) AS timestamp_list, groupArray(session_id) AS session_id_list, groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, events__pdi.person_id AS person_id, ifNull(if(equals(events.event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event)), '') AS path_item_ungrouped, events.uuid AS uuid, toTimeZone(events.timestamp, 'UTC') AS timestamp, ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, NULL AS groupings, multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2011-12-31 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-07 23:59:59', 6, 'UTC'))))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE and(ifNull(equals(last_path_key, '2_/2'), 0), ifNull(equals(path_key, '3_/3'), 0)) + GROUP BY person_id SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -4770,7 +5389,7 @@ # --- # name: TestClickhousePaths.test_step_limit.6 ''' - SELECT persons.id AS id + SELECT filterable_persons.id AS id FROM (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, @@ -4869,14 +5488,43 @@ JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index)) WHERE and(ifNull(equals(last_path_key, '1_/1'), 0), ifNull(equals(path_key, '2_/2'), 0)) - GROUP BY person_id) AS source + GROUP BY person_id SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, count(*) AS event_count + FROM + (SELECT person_id AS person_id, path AS path, conversion_time AS conversion_time, event_in_session_index AS event_in_session_index, concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, path_dropoff_key AS path_dropoff_key, final_uuid AS uuid, final_timestamp AS timestamp, final_session_id AS session_id, final_window_id AS window_id + FROM + (SELECT person_id AS person_id, joined_path_tuple.1 AS path, joined_path_tuple.2 AS conversion_time, joined_path_tuple.3 AS prev_path, event_in_session_index, session_index AS session_index, arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, arrayFilter((x, y) -> y, time, mapping) AS timings, arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, indexOf(compact_path, NULL) AS target_index, joined_path_tuple.4 AS final_uuid, joined_path_tuple.5 AS final_timestamp, joined_path_tuple.6 AS final_session_id, joined_path_tuple.7 AS final_window_id, arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, arraySlice(filtered_path, 1, 4) AS limited_path, if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, arraySlice(filtered_timings, 1, 4) AS limited_timings, if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, arraySlice(filtered_uuid, 1, 4) AS limited_uuid, if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, arraySlice(filtered_timestamp, 1, 4) AS limited_timestamp, if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, arraySlice(filtered_session_id, 1, 4) AS limited_session_id, if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, arraySlice(filtered_window_id, 1, 4) AS limited_window_id, arrayDifference(limited_timings) AS timings_diff, concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, path_time_tuple.1 AS path_basic, path_time_tuple.2 AS time, session_index, arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, path_time_tuple.4 AS uuid_items, path_time_tuple.5 AS timestamp_items, path_time_tuple.6 AS session_id_items, path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, groupArray(timestamp) AS timing_list, groupArray(path_item) AS path_list, groupArray(uuid) AS uuid_list, groupArray(timestamp) AS timestamp_list, groupArray(session_id) AS session_id_list, groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, events__pdi.person_id AS person_id, ifNull(if(equals(events.event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event)), '') AS path_item_ungrouped, events.uuid AS uuid, toTimeZone(events.timestamp, 'UTC') AS timestamp, ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, NULL AS groupings, multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2011-12-31 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-07 23:59:59', 6, 'UTC'))))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE and(ifNull(equals(last_path_key, '1_/1'), 0), ifNull(equals(path_key, '2_/2'), 0)) + GROUP BY person_id SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -4890,7 +5538,7 @@ # --- # name: TestClickhousePaths.test_step_limit.7 ''' - SELECT persons.id AS id + SELECT filterable_persons.id AS id FROM (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, @@ -4989,14 +5637,43 @@ JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index)) WHERE and(ifNull(equals(last_path_key, '2_/2'), 0), ifNull(equals(path_key, '3_/3'), 0)) - GROUP BY person_id) AS source + GROUP BY person_id SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, count(*) AS event_count + FROM + (SELECT person_id AS person_id, path AS path, conversion_time AS conversion_time, event_in_session_index AS event_in_session_index, concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, path_dropoff_key AS path_dropoff_key, final_uuid AS uuid, final_timestamp AS timestamp, final_session_id AS session_id, final_window_id AS window_id + FROM + (SELECT person_id AS person_id, joined_path_tuple.1 AS path, joined_path_tuple.2 AS conversion_time, joined_path_tuple.3 AS prev_path, event_in_session_index, session_index AS session_index, arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, arrayFilter((x, y) -> y, time, mapping) AS timings, arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, indexOf(compact_path, NULL) AS target_index, joined_path_tuple.4 AS final_uuid, joined_path_tuple.5 AS final_timestamp, joined_path_tuple.6 AS final_session_id, joined_path_tuple.7 AS final_window_id, arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, arraySlice(filtered_path, 1, 4) AS limited_path, if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, arraySlice(filtered_timings, 1, 4) AS limited_timings, if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, arraySlice(filtered_uuid, 1, 4) AS limited_uuid, if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, arraySlice(filtered_timestamp, 1, 4) AS limited_timestamp, if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, arraySlice(filtered_session_id, 1, 4) AS limited_session_id, if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, arraySlice(filtered_window_id, 1, 4) AS limited_window_id, arrayDifference(limited_timings) AS timings_diff, concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, path_time_tuple.1 AS path_basic, path_time_tuple.2 AS time, session_index, arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, path_time_tuple.4 AS uuid_items, path_time_tuple.5 AS timestamp_items, path_time_tuple.6 AS session_id_items, path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, groupArray(timestamp) AS timing_list, groupArray(path_item) AS path_list, groupArray(uuid) AS uuid_list, groupArray(timestamp) AS timestamp_list, groupArray(session_id) AS session_id_list, groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, events__pdi.person_id AS person_id, ifNull(if(equals(events.event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event)), '') AS path_item_ungrouped, events.uuid AS uuid, toTimeZone(events.timestamp, 'UTC') AS timestamp, ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, NULL AS groupings, multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2011-12-31 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-07 23:59:59', 6, 'UTC'))))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE and(ifNull(equals(last_path_key, '2_/2'), 0), ifNull(equals(path_key, '3_/3'), 0)) + GROUP BY person_id SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60, @@ -5010,7 +5687,7 @@ # --- # name: TestClickhousePaths.test_step_limit.8 ''' - SELECT persons.id AS id + SELECT filterable_persons.id AS id FROM (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, @@ -5109,14 +5786,43 @@ JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index)) WHERE and(ifNull(equals(last_path_key, '3_/3'), 0), ifNull(equals(path_key, '4_/4'), 0)) - GROUP BY person_id) AS source + GROUP BY person_id SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(id, + (SELECT source.actor_id AS actor_id + FROM + (SELECT person_id AS actor_id, groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, count(*) AS event_count + FROM + (SELECT person_id AS person_id, path AS path, conversion_time AS conversion_time, event_in_session_index AS event_in_session_index, concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, path_dropoff_key AS path_dropoff_key, final_uuid AS uuid, final_timestamp AS timestamp, final_session_id AS session_id, final_window_id AS window_id + FROM + (SELECT person_id AS person_id, joined_path_tuple.1 AS path, joined_path_tuple.2 AS conversion_time, joined_path_tuple.3 AS prev_path, event_in_session_index, session_index AS session_index, arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, arrayFilter((x, y) -> y, time, mapping) AS timings, arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, indexOf(compact_path, NULL) AS target_index, joined_path_tuple.4 AS final_uuid, joined_path_tuple.5 AS final_timestamp, joined_path_tuple.6 AS final_session_id, joined_path_tuple.7 AS final_window_id, arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, arraySlice(filtered_path, 1, 4) AS limited_path, if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, arraySlice(filtered_timings, 1, 4) AS limited_timings, if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, arraySlice(filtered_uuid, 1, 4) AS limited_uuid, if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, arraySlice(filtered_timestamp, 1, 4) AS limited_timestamp, if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, arraySlice(filtered_session_id, 1, 4) AS limited_session_id, if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, arraySlice(filtered_window_id, 1, 4) AS limited_window_id, arrayDifference(limited_timings) AS timings_diff, concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, path_time_tuple.1 AS path_basic, path_time_tuple.2 AS time, session_index, arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, path_time_tuple.4 AS uuid_items, path_time_tuple.5 AS timestamp_items, path_time_tuple.6 AS session_id_items, path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, groupArray(timestamp) AS timing_list, groupArray(path_item) AS path_list, groupArray(uuid) AS uuid_list, groupArray(timestamp) AS timestamp_list, groupArray(session_id) AS session_id_list, groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, events__pdi.person_id AS person_id, ifNull(if(equals(events.event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event)), '') AS path_item_ungrouped, events.uuid AS uuid, toTimeZone(events.timestamp, 'UTC') AS timestamp, ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, NULL AS groupings, multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2011-12-31 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-07 23:59:59', 6, 'UTC'))))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE and(ifNull(equals(last_path_key, '3_/3'), 0), ifNull(equals(path_key, '4_/4'), 0)) + GROUP BY person_id SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) + ORDER BY filterable_persons.id ASC LIMIT 101 OFFSET 0 SETTINGS readonly=2, max_execution_time=60,