diff --git a/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel_correlation.ambr b/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel_correlation.ambr index f5656c8ed7ad5..067665c0cf2a0 100644 --- a/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel_correlation.ambr +++ b/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel_correlation.ambr @@ -9,7 +9,7 @@ (SELECT aggregation_target, steps, avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , + median(step_1_conversion_time) step_1_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_1, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -17,14 +17,14 @@ (SELECT aggregation_target, steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , + step_1_conversion_time, latest_0, latest_1, latest_0 FROM (SELECT *, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time FROM @@ -75,9 +75,10 @@ toDateTime('2020-01-01 00:00:00', 'UTC') AS date_from, 2 AS target_step, ['paid', 'user signed up'] as funnel_step_names - SELECT event.event AS name, - countDistinctIf(actors.actor_id, actors.steps = target_step) AS success_count, - countDistinctIf(actors.actor_id, actors.steps <> target_step) AS failure_count + SELECT event.event AS name, -- If we have a `person.steps = target_step`, we know the person + -- reached the end of the funnel + countDistinctIf(actors.actor_id, actors.steps = target_step) AS success_count, -- And the converse being for failures + countDistinctIf(actors.actor_id, actors.steps <> target_step) AS failure_count FROM events AS event JOIN (SELECT distinct_id, @@ -85,20 +86,37 @@ FROM person_distinct_id2 WHERE team_id = 2 GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON pdi.distinct_id = events.distinct_id - JOIN funnel_actors AS actors ON pdi.person_id = actors.actor_id - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 2 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names - AND event.event NOT IN [] - GROUP BY name + HAVING argMax(is_deleted, version) = 0) AS pdi ON pdi.distinct_id = events.distinct_id -- NOTE: I would love to right join here, so we count get total + -- success/failure numbers in one pass, but this causes out of memory + -- error mentioning issues with right filling. I'm sure there's a way + -- to do it but lifes too short. + + JOIN funnel_actors AS actors ON pdi.person_id = actors.actor_id -- Make sure we're only looking at events before the final step, or + -- failing that, date_to + + WHERE -- add this condition in to ensure we can filter events before + -- joining funnel_actors + toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to + AND event.team_id = 2 -- Add in per actor filtering on event time range. We just want + -- to include events that happened within the bounds of the + -- actors time in the funnel. + + AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) -- Ensure that the event is not outside the bounds of the funnel conversion window + -- Exclude funnel steps + + AND event.event NOT IN funnel_step_names + AND event.event NOT IN [] + GROUP BY name -- To get the total success/failure numbers, we do an aggregation on + -- the funnel people CTE and count distinct actor_ids + UNION ALL - SELECT 'Total_Values_In_Query' as name, - countDistinctIf(actors.actor_id, actors.steps = target_step) AS success_count, - countDistinctIf(actors.actor_id, actors.steps <> target_step) AS failure_count + SELECT -- We're not using WITH TOTALS because the resulting queries are + -- not runnable in Metabase + 'Total_Values_In_Query' as name, + countDistinctIf(actors.actor_id, actors.steps = target_step) AS success_count, + countDistinctIf(actors.actor_id, actors.steps <> target_step) AS failure_count FROM funnel_actors AS actors ''' # --- @@ -112,7 +130,7 @@ (SELECT aggregation_target, steps, avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , + median(step_1_conversion_time) step_1_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_1, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -120,14 +138,14 @@ (SELECT aggregation_target, steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , + step_1_conversion_time, latest_0, latest_1, latest_0 FROM (SELECT *, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time FROM @@ -173,24 +191,47 @@ ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, max_expanded_ast_elements=1000000), 2 AS target_step - SELECT concat(prop.1, '::', prop.2) as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count + SELECT concat(prop.1, '::', prop.2) as name, -- We generate a unique identifier for each property value as: PropertyName::Value + countDistinctIf(actor_id, steps = target_step) AS success_count, + countDistinctIf(actor_id, steps <> target_step) AS failure_count FROM (SELECT actor_id, - funnel_actors.steps as steps, - arrayJoin(arrayZip(['$browser'], [replaceRegexpAll(JSONExtractRaw(person_props, '$browser'), '^"|"$', '')])) as prop - FROM funnel_actors - JOIN - (SELECT id, - argMax(properties, version) as person_props - FROM person - WHERE team_id = 2 - GROUP BY id - HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1) person ON person.id = funnel_actors.actor_id) aggregation_target_with_props - GROUP BY prop.1, - prop.2 - HAVING prop.1 NOT IN [] + funnel_actors.steps as steps, /* + We can extract multiple property values at the same time, since we're + already querying the person table. + This gives us something like: + -------------------- + person1, steps, [property_value_0, property_value_1, property_value_2] + person2, steps, [property_value_0, property_value_1, property_value_2] + + To group by property name, we need to extract the property from the array. ArrayJoin helps us do that. + It transforms the above into: + + -------------------- + + person1, steps, property_value_0 + person1, steps, property_value_1 + person1, steps, property_value_2 + + person2, steps, property_value_0 + person2, steps, property_value_1 + person2, steps, property_value_2 + + To avoid clashes and clarify the values, we also zip with the property name, to generate + tuples like: (property_name, property_value), which we then group by + */ arrayJoin(arrayZip(['$browser'], [replaceRegexpAll(JSONExtractRaw(person_props, '$browser'), '^"|"$', '')])) as prop + FROM funnel_actors + JOIN + (SELECT id, + argMax(properties, version) as person_props + FROM person + WHERE team_id = 2 + GROUP BY id + HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1) person ON person.id = funnel_actors.actor_id) aggregation_target_with_props -- Group by the tuple items: (property_name, property_value) generated by zip + + GROUP BY prop.1, + prop.2 + HAVING prop.1 NOT IN [] UNION ALL SELECT 'Total_Values_In_Query' as name, countDistinctIf(actor_id, steps = target_step) AS success_count, @@ -576,7 +617,7 @@ (SELECT aggregation_target, steps, avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , + median(step_1_conversion_time) step_1_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_1, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -584,14 +625,14 @@ (SELECT aggregation_target, steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , + step_1_conversion_time, latest_0, latest_1, latest_0 FROM (SELECT *, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time FROM @@ -637,24 +678,47 @@ ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, max_expanded_ast_elements=1000000), 2 AS target_step - SELECT concat(prop.1, '::', prop.2) as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count + SELECT concat(prop.1, '::', prop.2) as name, -- We generate a unique identifier for each property value as: PropertyName::Value + countDistinctIf(actor_id, steps = target_step) AS success_count, + countDistinctIf(actor_id, steps <> target_step) AS failure_count FROM (SELECT actor_id, - funnel_actors.steps as steps, - arrayJoin(arrayZip(['$browser'], ["pmat_$browser"])) as prop - FROM funnel_actors - JOIN - (SELECT id, - argMax(pmat_$browser, version) as pmat_$browser - FROM person - WHERE team_id = 2 - GROUP BY id - HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1) person ON person.id = funnel_actors.actor_id) aggregation_target_with_props - GROUP BY prop.1, - prop.2 - HAVING prop.1 NOT IN [] + funnel_actors.steps as steps, /* + We can extract multiple property values at the same time, since we're + already querying the person table. + This gives us something like: + -------------------- + person1, steps, [property_value_0, property_value_1, property_value_2] + person2, steps, [property_value_0, property_value_1, property_value_2] + + To group by property name, we need to extract the property from the array. ArrayJoin helps us do that. + It transforms the above into: + + -------------------- + + person1, steps, property_value_0 + person1, steps, property_value_1 + person1, steps, property_value_2 + + person2, steps, property_value_0 + person2, steps, property_value_1 + person2, steps, property_value_2 + + To avoid clashes and clarify the values, we also zip with the property name, to generate + tuples like: (property_name, property_value), which we then group by + */ arrayJoin(arrayZip(['$browser'], ["pmat_$browser"])) as prop + FROM funnel_actors + JOIN + (SELECT id, + argMax(pmat_$browser, version) as pmat_$browser + FROM person + WHERE team_id = 2 + GROUP BY id + HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1) person ON person.id = funnel_actors.actor_id) aggregation_target_with_props -- Group by the tuple items: (property_name, property_value) generated by zip + + GROUP BY prop.1, + prop.2 + HAVING prop.1 NOT IN [] UNION ALL SELECT 'Total_Values_In_Query' as name, countDistinctIf(actor_id, steps = target_step) AS success_count, @@ -1040,7 +1104,7 @@ (SELECT aggregation_target, steps, avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , + median(step_1_conversion_time) step_1_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_1, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -1048,14 +1112,14 @@ (SELECT aggregation_target, steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , + step_1_conversion_time, latest_0, latest_1, latest_0 FROM (SELECT *, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time FROM @@ -1095,21 +1159,34 @@ FROM (SELECT actors.actor_id as actor_id, actors.steps as steps, - events.event as event_name, - arrayJoin(JSONExtractKeysAndValues(properties, 'String')) as prop + events.event as event_name, -- Same as what we do in $all property queries + arrayJoin(JSONExtractKeysAndValues(properties, 'String')) as prop FROM events AS event - JOIN funnel_actors AS actors ON actors.actor_id = events.$group_1 - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 2 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names - AND event.event IN ['positively_related', 'negatively_related'] ) - GROUP BY name - HAVING (success_count + failure_count) > 2 - AND prop.1 NOT IN [] - UNION ALL + JOIN funnel_actors AS actors ON actors.actor_id = events.$group_1 -- Make sure we're only looking at events before the final step, or + -- failing that, date_to + + WHERE -- add this condition in to ensure we can filter events before + -- joining funnel_actors + toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to + AND event.team_id = 2 -- Add in per actor filtering on event time range. We just want + -- to include events that happened within the bounds of the + -- actors time in the funnel. + + AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) -- Ensure that the event is not outside the bounds of the funnel conversion window + -- Exclude funnel steps + + AND event.event NOT IN funnel_step_names + AND event.event IN ['positively_related', 'negatively_related'] ) + GROUP BY name -- Discard high cardinality / low hits properties + -- This removes the long tail of random properties with empty, null, or very small values + + HAVING (success_count + failure_count) > 2 + AND prop.1 NOT IN [] + UNION ALL -- To get the total success/failure numbers, we do an aggregation on + -- the funnel people CTE and count distinct actor_ids + SELECT 'Total_Values_In_Query' as name, countDistinctIf(actors.actor_id, actors.steps = target_step) AS success_count, countDistinctIf(actors.actor_id, actors.steps <> target_step) AS failure_count @@ -1126,7 +1203,7 @@ (SELECT aggregation_target, steps, avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , + median(step_1_conversion_time) step_1_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_1, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -1134,14 +1211,14 @@ (SELECT aggregation_target, steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , + step_1_conversion_time, latest_0, latest_1, latest_0 FROM (SELECT *, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time FROM @@ -1181,21 +1258,34 @@ FROM (SELECT actors.actor_id as actor_id, actors.steps as steps, - events.event as event_name, - arrayJoin(JSONExtractKeysAndValues(properties, 'String')) as prop + events.event as event_name, -- Same as what we do in $all property queries + arrayJoin(JSONExtractKeysAndValues(properties, 'String')) as prop FROM events AS event - JOIN funnel_actors AS actors ON actors.actor_id = events.$group_1 - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 2 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names - AND event.event IN ['positively_related', 'negatively_related'] ) - GROUP BY name - HAVING (success_count + failure_count) > 2 - AND prop.1 NOT IN [] - UNION ALL + JOIN funnel_actors AS actors ON actors.actor_id = events.$group_1 -- Make sure we're only looking at events before the final step, or + -- failing that, date_to + + WHERE -- add this condition in to ensure we can filter events before + -- joining funnel_actors + toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to + AND event.team_id = 2 -- Add in per actor filtering on event time range. We just want + -- to include events that happened within the bounds of the + -- actors time in the funnel. + + AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) -- Ensure that the event is not outside the bounds of the funnel conversion window + -- Exclude funnel steps + + AND event.event NOT IN funnel_step_names + AND event.event IN ['positively_related', 'negatively_related'] ) + GROUP BY name -- Discard high cardinality / low hits properties + -- This removes the long tail of random properties with empty, null, or very small values + + HAVING (success_count + failure_count) > 2 + AND prop.1 NOT IN [] + UNION ALL -- To get the total success/failure numbers, we do an aggregation on + -- the funnel people CTE and count distinct actor_ids + SELECT 'Total_Values_In_Query' as name, countDistinctIf(actors.actor_id, actors.steps = target_step) AS success_count, countDistinctIf(actors.actor_id, actors.steps <> target_step) AS failure_count @@ -1212,7 +1302,7 @@ (SELECT aggregation_target, steps, avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , + median(step_1_conversion_time) step_1_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_1, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -1220,14 +1310,14 @@ (SELECT aggregation_target, steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , + step_1_conversion_time, latest_0, latest_1, latest_0 FROM (SELECT *, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time FROM @@ -1261,23 +1351,37 @@ toDateTime('2020-01-01 00:00:00', 'UTC') AS date_from, 2 AS target_step, ['paid', 'user signed up'] as funnel_step_names - SELECT event.event AS name, - countDistinctIf(actors.actor_id, actors.steps = target_step) AS success_count, - countDistinctIf(actors.actor_id, actors.steps <> target_step) AS failure_count + SELECT event.event AS name, -- If we have a `person.steps = target_step`, we know the person + -- reached the end of the funnel + countDistinctIf(actors.actor_id, actors.steps = target_step) AS success_count, -- And the converse being for failures + countDistinctIf(actors.actor_id, actors.steps <> target_step) AS failure_count FROM events AS event - JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 2 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names - AND event.event NOT IN [] - GROUP BY name + JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 -- Make sure we're only looking at events before the final step, or + -- failing that, date_to + + WHERE -- add this condition in to ensure we can filter events before + -- joining funnel_actors + toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to + AND event.team_id = 2 -- Add in per actor filtering on event time range. We just want + -- to include events that happened within the bounds of the + -- actors time in the funnel. + + AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) -- Ensure that the event is not outside the bounds of the funnel conversion window + -- Exclude funnel steps + + AND event.event NOT IN funnel_step_names + AND event.event NOT IN [] + GROUP BY name -- To get the total success/failure numbers, we do an aggregation on + -- the funnel people CTE and count distinct actor_ids + UNION ALL - SELECT 'Total_Values_In_Query' as name, - countDistinctIf(actors.actor_id, actors.steps = target_step) AS success_count, - countDistinctIf(actors.actor_id, actors.steps <> target_step) AS failure_count + SELECT -- We're not using WITH TOTALS because the resulting queries are + -- not runnable in Metabase + 'Total_Values_In_Query' as name, + countDistinctIf(actors.actor_id, actors.steps = target_step) AS success_count, + countDistinctIf(actors.actor_id, actors.steps <> target_step) AS failure_count FROM funnel_actors AS actors ''' # --- @@ -1291,7 +1395,7 @@ (SELECT aggregation_target, steps, avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , + median(step_1_conversion_time) step_1_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_1, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -1299,14 +1403,14 @@ (SELECT aggregation_target, steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , + step_1_conversion_time, latest_0, latest_1, latest_0 FROM (SELECT *, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time FROM @@ -1342,13 +1446,22 @@ ['user signed up', 'paid'] as funnel_step_names SELECT actors.actor_id AS actor_id FROM events AS event - JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 2 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names + JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 -- Make sure we're only looking at events before the final step, or + -- failing that, date_to + + WHERE -- add this condition in to ensure we can filter events before + -- joining funnel_actors + toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to + AND event.team_id = 2 -- Add in per actor filtering on event time range. We just want + -- to include events that happened within the bounds of the + -- actors time in the funnel. + + AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) -- Ensure that the event is not outside the bounds of the funnel conversion window + -- Exclude funnel steps + + AND event.event NOT IN funnel_step_names AND event.event = 'positively_related' AND actors.steps = target_step GROUP BY actor_id @@ -1367,7 +1480,7 @@ (SELECT aggregation_target, steps, avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , + median(step_1_conversion_time) step_1_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_1, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -1375,14 +1488,14 @@ (SELECT aggregation_target, steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , + step_1_conversion_time, latest_0, latest_1, latest_0 FROM (SELECT *, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time FROM @@ -1418,13 +1531,22 @@ ['user signed up', 'paid'] as funnel_step_names SELECT actors.actor_id AS actor_id FROM events AS event - JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 2 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names + JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 -- Make sure we're only looking at events before the final step, or + -- failing that, date_to + + WHERE -- add this condition in to ensure we can filter events before + -- joining funnel_actors + toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to + AND event.team_id = 2 -- Add in per actor filtering on event time range. We just want + -- to include events that happened within the bounds of the + -- actors time in the funnel. + + AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) -- Ensure that the event is not outside the bounds of the funnel conversion window + -- Exclude funnel steps + + AND event.event NOT IN funnel_step_names AND event.event = 'positively_related' AND actors.steps <> target_step GROUP BY actor_id @@ -1443,7 +1565,7 @@ (SELECT aggregation_target, steps, avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , + median(step_1_conversion_time) step_1_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_1, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -1451,14 +1573,14 @@ (SELECT aggregation_target, steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , + step_1_conversion_time, latest_0, latest_1, latest_0 FROM (SELECT *, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time FROM @@ -1494,13 +1616,22 @@ ['user signed up', 'paid'] as funnel_step_names SELECT actors.actor_id AS actor_id FROM events AS event - JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 2 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names + JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 -- Make sure we're only looking at events before the final step, or + -- failing that, date_to + + WHERE -- add this condition in to ensure we can filter events before + -- joining funnel_actors + toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to + AND event.team_id = 2 -- Add in per actor filtering on event time range. We just want + -- to include events that happened within the bounds of the + -- actors time in the funnel. + + AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) -- Ensure that the event is not outside the bounds of the funnel conversion window + -- Exclude funnel steps + + AND event.event NOT IN funnel_step_names AND event.event = 'negatively_related' AND actors.steps = target_step GROUP BY actor_id @@ -1519,7 +1650,7 @@ (SELECT aggregation_target, steps, avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , + median(step_1_conversion_time) step_1_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_1, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -1527,14 +1658,14 @@ (SELECT aggregation_target, steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , + step_1_conversion_time, latest_0, latest_1, latest_0 FROM (SELECT *, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time FROM @@ -1570,13 +1701,22 @@ ['user signed up', 'paid'] as funnel_step_names SELECT actors.actor_id AS actor_id FROM events AS event - JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 2 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names + JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 -- Make sure we're only looking at events before the final step, or + -- failing that, date_to + + WHERE -- add this condition in to ensure we can filter events before + -- joining funnel_actors + toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to + AND event.team_id = 2 -- Add in per actor filtering on event time range. We just want + -- to include events that happened within the bounds of the + -- actors time in the funnel. + + AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) -- Ensure that the event is not outside the bounds of the funnel conversion window + -- Exclude funnel steps + + AND event.event NOT IN funnel_step_names AND event.event = 'negatively_related' AND actors.steps <> target_step GROUP BY actor_id @@ -1595,7 +1735,7 @@ (SELECT aggregation_target, steps, avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , + median(step_1_conversion_time) step_1_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_1, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -1603,14 +1743,14 @@ (SELECT aggregation_target, steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , + step_1_conversion_time, latest_0, latest_1, latest_0 FROM (SELECT *, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time FROM @@ -1652,23 +1792,37 @@ toDateTime('2020-01-01 00:00:00', 'UTC') AS date_from, 2 AS target_step, ['paid', 'user signed up'] as funnel_step_names - SELECT event.event AS name, - countDistinctIf(actors.actor_id, actors.steps = target_step) AS success_count, - countDistinctIf(actors.actor_id, actors.steps <> target_step) AS failure_count + SELECT event.event AS name, -- If we have a `person.steps = target_step`, we know the person + -- reached the end of the funnel + countDistinctIf(actors.actor_id, actors.steps = target_step) AS success_count, -- And the converse being for failures + countDistinctIf(actors.actor_id, actors.steps <> target_step) AS failure_count FROM events AS event - JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 2 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names - AND event.event NOT IN [] - GROUP BY name + JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 -- Make sure we're only looking at events before the final step, or + -- failing that, date_to + + WHERE -- add this condition in to ensure we can filter events before + -- joining funnel_actors + toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to + AND event.team_id = 2 -- Add in per actor filtering on event time range. We just want + -- to include events that happened within the bounds of the + -- actors time in the funnel. + + AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) -- Ensure that the event is not outside the bounds of the funnel conversion window + -- Exclude funnel steps + + AND event.event NOT IN funnel_step_names + AND event.event NOT IN [] + GROUP BY name -- To get the total success/failure numbers, we do an aggregation on + -- the funnel people CTE and count distinct actor_ids + UNION ALL - SELECT 'Total_Values_In_Query' as name, - countDistinctIf(actors.actor_id, actors.steps = target_step) AS success_count, - countDistinctIf(actors.actor_id, actors.steps <> target_step) AS failure_count + SELECT -- We're not using WITH TOTALS because the resulting queries are + -- not runnable in Metabase + 'Total_Values_In_Query' as name, + countDistinctIf(actors.actor_id, actors.steps = target_step) AS success_count, + countDistinctIf(actors.actor_id, actors.steps <> target_step) AS failure_count FROM funnel_actors AS actors ''' # --- @@ -1682,7 +1836,7 @@ (SELECT aggregation_target, steps, avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , + median(step_1_conversion_time) step_1_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_1, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -1690,14 +1844,14 @@ (SELECT aggregation_target, steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , + step_1_conversion_time, latest_0, latest_1, latest_0 FROM (SELECT *, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time FROM @@ -1741,13 +1895,22 @@ ['user signed up', 'paid'] as funnel_step_names SELECT actors.actor_id AS actor_id FROM events AS event - JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 2 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names + JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 -- Make sure we're only looking at events before the final step, or + -- failing that, date_to + + WHERE -- add this condition in to ensure we can filter events before + -- joining funnel_actors + toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to + AND event.team_id = 2 -- Add in per actor filtering on event time range. We just want + -- to include events that happened within the bounds of the + -- actors time in the funnel. + + AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) -- Ensure that the event is not outside the bounds of the funnel conversion window + -- Exclude funnel steps + + AND event.event NOT IN funnel_step_names AND event.event = 'negatively_related' AND actors.steps = target_step GROUP BY actor_id @@ -1766,7 +1929,7 @@ (SELECT aggregation_target, steps, avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , + median(step_1_conversion_time) step_1_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_1, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -1774,14 +1937,14 @@ (SELECT aggregation_target, steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , + step_1_conversion_time, latest_0, latest_1, latest_0 FROM (SELECT *, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time FROM @@ -1825,13 +1988,22 @@ ['user signed up', 'paid'] as funnel_step_names SELECT actors.actor_id AS actor_id FROM events AS event - JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 2 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names + JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 -- Make sure we're only looking at events before the final step, or + -- failing that, date_to + + WHERE -- add this condition in to ensure we can filter events before + -- joining funnel_actors + toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to + AND event.team_id = 2 -- Add in per actor filtering on event time range. We just want + -- to include events that happened within the bounds of the + -- actors time in the funnel. + + AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) -- Ensure that the event is not outside the bounds of the funnel conversion window + -- Exclude funnel steps + + AND event.event NOT IN funnel_step_names AND event.event = 'negatively_related' AND actors.steps <> target_step GROUP BY actor_id @@ -1850,7 +2022,7 @@ (SELECT aggregation_target, steps, avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , + median(step_1_conversion_time) step_1_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_1, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -1858,14 +2030,14 @@ (SELECT aggregation_target, steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , + step_1_conversion_time, latest_0, latest_1, latest_0 FROM (SELECT *, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time FROM @@ -1907,23 +2079,37 @@ toDateTime('2020-01-01 00:00:00', 'UTC') AS date_from, 2 AS target_step, ['paid', 'user signed up'] as funnel_step_names - SELECT event.event AS name, - countDistinctIf(actors.actor_id, actors.steps = target_step) AS success_count, - countDistinctIf(actors.actor_id, actors.steps <> target_step) AS failure_count + SELECT event.event AS name, -- If we have a `person.steps = target_step`, we know the person + -- reached the end of the funnel + countDistinctIf(actors.actor_id, actors.steps = target_step) AS success_count, -- And the converse being for failures + countDistinctIf(actors.actor_id, actors.steps <> target_step) AS failure_count FROM events AS event - JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 2 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names - AND event.event NOT IN [] - GROUP BY name + JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 -- Make sure we're only looking at events before the final step, or + -- failing that, date_to + + WHERE -- add this condition in to ensure we can filter events before + -- joining funnel_actors + toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to + AND event.team_id = 2 -- Add in per actor filtering on event time range. We just want + -- to include events that happened within the bounds of the + -- actors time in the funnel. + + AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) -- Ensure that the event is not outside the bounds of the funnel conversion window + -- Exclude funnel steps + + AND event.event NOT IN funnel_step_names + AND event.event NOT IN [] + GROUP BY name -- To get the total success/failure numbers, we do an aggregation on + -- the funnel people CTE and count distinct actor_ids + UNION ALL - SELECT 'Total_Values_In_Query' as name, - countDistinctIf(actors.actor_id, actors.steps = target_step) AS success_count, - countDistinctIf(actors.actor_id, actors.steps <> target_step) AS failure_count + SELECT -- We're not using WITH TOTALS because the resulting queries are + -- not runnable in Metabase + 'Total_Values_In_Query' as name, + countDistinctIf(actors.actor_id, actors.steps = target_step) AS success_count, + countDistinctIf(actors.actor_id, actors.steps <> target_step) AS failure_count FROM funnel_actors AS actors ''' # --- @@ -1937,7 +2123,7 @@ (SELECT aggregation_target, steps, avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , + median(step_1_conversion_time) step_1_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_1, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -1945,14 +2131,14 @@ (SELECT aggregation_target, steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , + step_1_conversion_time, latest_0, latest_1, latest_0 FROM (SELECT *, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time FROM @@ -1996,13 +2182,22 @@ ['user signed up', 'paid'] as funnel_step_names SELECT actors.actor_id AS actor_id FROM events AS event - JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 2 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names + JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 -- Make sure we're only looking at events before the final step, or + -- failing that, date_to + + WHERE -- add this condition in to ensure we can filter events before + -- joining funnel_actors + toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to + AND event.team_id = 2 -- Add in per actor filtering on event time range. We just want + -- to include events that happened within the bounds of the + -- actors time in the funnel. + + AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) -- Ensure that the event is not outside the bounds of the funnel conversion window + -- Exclude funnel steps + + AND event.event NOT IN funnel_step_names AND event.event = 'positively_related' AND actors.steps = target_step GROUP BY actor_id @@ -2021,7 +2216,7 @@ (SELECT aggregation_target, steps, avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , + median(step_1_conversion_time) step_1_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_1, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -2029,14 +2224,14 @@ (SELECT aggregation_target, steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , + step_1_conversion_time, latest_0, latest_1, latest_0 FROM (SELECT *, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time FROM @@ -2080,13 +2275,22 @@ ['user signed up', 'paid'] as funnel_step_names SELECT actors.actor_id AS actor_id FROM events AS event - JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 2 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names + JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 -- Make sure we're only looking at events before the final step, or + -- failing that, date_to + + WHERE -- add this condition in to ensure we can filter events before + -- joining funnel_actors + toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to + AND event.team_id = 2 -- Add in per actor filtering on event time range. We just want + -- to include events that happened within the bounds of the + -- actors time in the funnel. + + AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) -- Ensure that the event is not outside the bounds of the funnel conversion window + -- Exclude funnel steps + + AND event.event NOT IN funnel_step_names AND event.event = 'positively_related' AND actors.steps <> target_step GROUP BY actor_id @@ -2105,7 +2309,7 @@ (SELECT aggregation_target, steps, avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , + median(step_1_conversion_time) step_1_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_1, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -2113,14 +2317,14 @@ (SELECT aggregation_target, steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , + step_1_conversion_time, latest_0, latest_1, latest_0 FROM (SELECT *, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time FROM @@ -2164,13 +2368,22 @@ ['user signed up', 'paid'] as funnel_step_names SELECT actors.actor_id AS actor_id FROM events AS event - JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 2 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names + JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 -- Make sure we're only looking at events before the final step, or + -- failing that, date_to + + WHERE -- add this condition in to ensure we can filter events before + -- joining funnel_actors + toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to + AND event.team_id = 2 -- Add in per actor filtering on event time range. We just want + -- to include events that happened within the bounds of the + -- actors time in the funnel. + + AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) -- Ensure that the event is not outside the bounds of the funnel conversion window + -- Exclude funnel steps + + AND event.event NOT IN funnel_step_names AND event.event = 'negatively_related' AND actors.steps = target_step GROUP BY actor_id @@ -2189,7 +2402,7 @@ (SELECT aggregation_target, steps, avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , + median(step_1_conversion_time) step_1_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_1, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -2197,14 +2410,14 @@ (SELECT aggregation_target, steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , + step_1_conversion_time, latest_0, latest_1, latest_0 FROM (SELECT *, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time FROM @@ -2248,13 +2461,22 @@ ['user signed up', 'paid'] as funnel_step_names SELECT actors.actor_id AS actor_id FROM events AS event - JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 2 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names + JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 -- Make sure we're only looking at events before the final step, or + -- failing that, date_to + + WHERE -- add this condition in to ensure we can filter events before + -- joining funnel_actors + toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to + AND event.team_id = 2 -- Add in per actor filtering on event time range. We just want + -- to include events that happened within the bounds of the + -- actors time in the funnel. + + AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) -- Ensure that the event is not outside the bounds of the funnel conversion window + -- Exclude funnel steps + + AND event.event NOT IN funnel_step_names AND event.event = 'negatively_related' AND actors.steps <> target_step GROUP BY actor_id @@ -2273,7 +2495,7 @@ (SELECT aggregation_target, steps, avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , + median(step_1_conversion_time) step_1_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_1, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -2281,14 +2503,14 @@ (SELECT aggregation_target, steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , + step_1_conversion_time, latest_0, latest_1, latest_0 FROM (SELECT *, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time FROM @@ -2338,23 +2560,37 @@ toDateTime('2020-01-01 00:00:00', 'UTC') AS date_from, 2 AS target_step, ['paid', 'user signed up'] as funnel_step_names - SELECT event.event AS name, - countDistinctIf(actors.actor_id, actors.steps = target_step) AS success_count, - countDistinctIf(actors.actor_id, actors.steps <> target_step) AS failure_count + SELECT event.event AS name, -- If we have a `person.steps = target_step`, we know the person + -- reached the end of the funnel + countDistinctIf(actors.actor_id, actors.steps = target_step) AS success_count, -- And the converse being for failures + countDistinctIf(actors.actor_id, actors.steps <> target_step) AS failure_count FROM events AS event - JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 2 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names - AND event.event NOT IN [] - GROUP BY name + JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 -- Make sure we're only looking at events before the final step, or + -- failing that, date_to + + WHERE -- add this condition in to ensure we can filter events before + -- joining funnel_actors + toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to + AND event.team_id = 2 -- Add in per actor filtering on event time range. We just want + -- to include events that happened within the bounds of the + -- actors time in the funnel. + + AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) -- Ensure that the event is not outside the bounds of the funnel conversion window + -- Exclude funnel steps + + AND event.event NOT IN funnel_step_names + AND event.event NOT IN [] + GROUP BY name -- To get the total success/failure numbers, we do an aggregation on + -- the funnel people CTE and count distinct actor_ids + UNION ALL - SELECT 'Total_Values_In_Query' as name, - countDistinctIf(actors.actor_id, actors.steps = target_step) AS success_count, - countDistinctIf(actors.actor_id, actors.steps <> target_step) AS failure_count + SELECT -- We're not using WITH TOTALS because the resulting queries are + -- not runnable in Metabase + 'Total_Values_In_Query' as name, + countDistinctIf(actors.actor_id, actors.steps = target_step) AS success_count, + countDistinctIf(actors.actor_id, actors.steps <> target_step) AS failure_count FROM funnel_actors AS actors ''' # --- @@ -2368,7 +2604,7 @@ (SELECT aggregation_target, steps, avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , + median(step_1_conversion_time) step_1_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_1, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -2376,14 +2612,14 @@ (SELECT aggregation_target, steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , + step_1_conversion_time, latest_0, latest_1, latest_0 FROM (SELECT *, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time FROM @@ -2435,13 +2671,22 @@ ['user signed up', 'paid'] as funnel_step_names SELECT actors.actor_id AS actor_id FROM events AS event - JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 2 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names + JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 -- Make sure we're only looking at events before the final step, or + -- failing that, date_to + + WHERE -- add this condition in to ensure we can filter events before + -- joining funnel_actors + toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to + AND event.team_id = 2 -- Add in per actor filtering on event time range. We just want + -- to include events that happened within the bounds of the + -- actors time in the funnel. + + AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) -- Ensure that the event is not outside the bounds of the funnel conversion window + -- Exclude funnel steps + + AND event.event NOT IN funnel_step_names AND event.event = 'negatively_related' AND actors.steps = target_step GROUP BY actor_id @@ -2460,7 +2705,7 @@ (SELECT aggregation_target, steps, avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , + median(step_1_conversion_time) step_1_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_1, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -2468,14 +2713,14 @@ (SELECT aggregation_target, steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , + step_1_conversion_time, latest_0, latest_1, latest_0 FROM (SELECT *, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time FROM @@ -2527,13 +2772,22 @@ ['user signed up', 'paid'] as funnel_step_names SELECT actors.actor_id AS actor_id FROM events AS event - JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 2 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names + JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 -- Make sure we're only looking at events before the final step, or + -- failing that, date_to + + WHERE -- add this condition in to ensure we can filter events before + -- joining funnel_actors + toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to + AND event.team_id = 2 -- Add in per actor filtering on event time range. We just want + -- to include events that happened within the bounds of the + -- actors time in the funnel. + + AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) -- Ensure that the event is not outside the bounds of the funnel conversion window + -- Exclude funnel steps + + AND event.event NOT IN funnel_step_names AND event.event = 'negatively_related' AND actors.steps <> target_step GROUP BY actor_id @@ -2552,7 +2806,7 @@ (SELECT aggregation_target, steps, avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , + median(step_1_conversion_time) step_1_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_1, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -2560,14 +2814,14 @@ (SELECT aggregation_target, steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , + step_1_conversion_time, latest_0, latest_1, latest_0 FROM (SELECT *, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time FROM @@ -2598,24 +2852,47 @@ ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, max_expanded_ast_elements=1000000), 2 AS target_step - SELECT concat(prop.1, '::', prop.2) as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count + SELECT concat(prop.1, '::', prop.2) as name, -- We generate a unique identifier for each property value as: PropertyName::Value + countDistinctIf(actor_id, steps = target_step) AS success_count, + countDistinctIf(actor_id, steps <> target_step) AS failure_count FROM (SELECT actor_id, - funnel_actors.steps as steps, - arrayJoin(arrayZip(['industry'], [replaceRegexpAll(JSONExtractRaw(groups_0.group_properties_0, 'industry'), '^"|"$', '')])) as prop - FROM funnel_actors - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 2 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON funnel_actors.actor_id == groups_0.group_key) aggregation_target_with_props - GROUP BY prop.1, - prop.2 - HAVING prop.1 NOT IN [] + funnel_actors.steps as steps, /* + We can extract multiple property values at the same time, since we're + already querying the person table. + This gives us something like: + -------------------- + person1, steps, [property_value_0, property_value_1, property_value_2] + person2, steps, [property_value_0, property_value_1, property_value_2] + + To group by property name, we need to extract the property from the array. ArrayJoin helps us do that. + It transforms the above into: + + -------------------- + + person1, steps, property_value_0 + person1, steps, property_value_1 + person1, steps, property_value_2 + + person2, steps, property_value_0 + person2, steps, property_value_1 + person2, steps, property_value_2 + + To avoid clashes and clarify the values, we also zip with the property name, to generate + tuples like: (property_name, property_value), which we then group by + */ arrayJoin(arrayZip(['industry'], [replaceRegexpAll(JSONExtractRaw(groups_0.group_properties_0, 'industry'), '^"|"$', '')])) as prop + FROM funnel_actors + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON funnel_actors.actor_id == groups_0.group_key) aggregation_target_with_props -- Group by the tuple items: (property_name, property_value) generated by zip + + GROUP BY prop.1, + prop.2 + HAVING prop.1 NOT IN [] UNION ALL SELECT 'Total_Values_In_Query' as name, countDistinctIf(actor_id, steps = target_step) AS success_count, @@ -2925,7 +3202,7 @@ (SELECT aggregation_target, steps, avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , + median(step_1_conversion_time) step_1_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_1, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -2933,14 +3210,14 @@ (SELECT aggregation_target, steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , + step_1_conversion_time, latest_0, latest_1, latest_0 FROM (SELECT *, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time FROM @@ -2971,24 +3248,47 @@ ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, max_expanded_ast_elements=1000000), 2 AS target_step - SELECT concat(prop.1, '::', prop.2) as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count + SELECT concat(prop.1, '::', prop.2) as name, -- We generate a unique identifier for each property value as: PropertyName::Value + countDistinctIf(actor_id, steps = target_step) AS success_count, + countDistinctIf(actor_id, steps <> target_step) AS failure_count FROM (SELECT actor_id, - funnel_actors.steps as steps, - arrayJoin(JSONExtractKeysAndValues(groups_0.group_properties_0, 'String')) as prop - FROM funnel_actors - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 2 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON funnel_actors.actor_id == groups_0.group_key) aggregation_target_with_props - GROUP BY prop.1, - prop.2 - HAVING prop.1 NOT IN [] + funnel_actors.steps as steps, /* + We can extract multiple property values at the same time, since we're + already querying the person table. + This gives us something like: + -------------------- + person1, steps, [property_value_0, property_value_1, property_value_2] + person2, steps, [property_value_0, property_value_1, property_value_2] + + To group by property name, we need to extract the property from the array. ArrayJoin helps us do that. + It transforms the above into: + + -------------------- + + person1, steps, property_value_0 + person1, steps, property_value_1 + person1, steps, property_value_2 + + person2, steps, property_value_0 + person2, steps, property_value_1 + person2, steps, property_value_2 + + To avoid clashes and clarify the values, we also zip with the property name, to generate + tuples like: (property_name, property_value), which we then group by + */ arrayJoin(JSONExtractKeysAndValues(groups_0.group_properties_0, 'String')) as prop + FROM funnel_actors + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON funnel_actors.actor_id == groups_0.group_key) aggregation_target_with_props -- Group by the tuple items: (property_name, property_value) generated by zip + + GROUP BY prop.1, + prop.2 + HAVING prop.1 NOT IN [] UNION ALL SELECT 'Total_Values_In_Query' as name, countDistinctIf(actor_id, steps = target_step) AS success_count, @@ -3006,7 +3306,7 @@ (SELECT aggregation_target, steps, avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , + median(step_1_conversion_time) step_1_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_1, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -3014,14 +3314,14 @@ (SELECT aggregation_target, steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , + step_1_conversion_time, latest_0, latest_1, latest_0 FROM (SELECT *, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time FROM @@ -3052,24 +3352,47 @@ ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, max_expanded_ast_elements=1000000), 2 AS target_step - SELECT concat(prop.1, '::', prop.2) as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count + SELECT concat(prop.1, '::', prop.2) as name, -- We generate a unique identifier for each property value as: PropertyName::Value + countDistinctIf(actor_id, steps = target_step) AS success_count, + countDistinctIf(actor_id, steps <> target_step) AS failure_count FROM (SELECT actor_id, - funnel_actors.steps as steps, - arrayJoin(arrayZip(['industry'], [replaceRegexpAll(JSONExtractRaw(groups_0.group_properties_0, 'industry'), '^"|"$', '')])) as prop - FROM funnel_actors - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 2 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON funnel_actors.actor_id == groups_0.group_key) aggregation_target_with_props - GROUP BY prop.1, - prop.2 - HAVING prop.1 NOT IN [] + funnel_actors.steps as steps, /* + We can extract multiple property values at the same time, since we're + already querying the person table. + This gives us something like: + -------------------- + person1, steps, [property_value_0, property_value_1, property_value_2] + person2, steps, [property_value_0, property_value_1, property_value_2] + + To group by property name, we need to extract the property from the array. ArrayJoin helps us do that. + It transforms the above into: + + -------------------- + + person1, steps, property_value_0 + person1, steps, property_value_1 + person1, steps, property_value_2 + + person2, steps, property_value_0 + person2, steps, property_value_1 + person2, steps, property_value_2 + + To avoid clashes and clarify the values, we also zip with the property name, to generate + tuples like: (property_name, property_value), which we then group by + */ arrayJoin(arrayZip(['industry'], [replaceRegexpAll(JSONExtractRaw(groups_0.group_properties_0, 'industry'), '^"|"$', '')])) as prop + FROM funnel_actors + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON funnel_actors.actor_id == groups_0.group_key) aggregation_target_with_props -- Group by the tuple items: (property_name, property_value) generated by zip + + GROUP BY prop.1, + prop.2 + HAVING prop.1 NOT IN [] UNION ALL SELECT 'Total_Values_In_Query' as name, countDistinctIf(actor_id, steps = target_step) AS success_count, @@ -3379,7 +3702,7 @@ (SELECT aggregation_target, steps, avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , + median(step_1_conversion_time) step_1_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_1, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -3387,14 +3710,14 @@ (SELECT aggregation_target, steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , + step_1_conversion_time, latest_0, latest_1, latest_0 FROM (SELECT *, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time FROM @@ -3425,24 +3748,47 @@ ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, max_expanded_ast_elements=1000000), 2 AS target_step - SELECT concat(prop.1, '::', prop.2) as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count + SELECT concat(prop.1, '::', prop.2) as name, -- We generate a unique identifier for each property value as: PropertyName::Value + countDistinctIf(actor_id, steps = target_step) AS success_count, + countDistinctIf(actor_id, steps <> target_step) AS failure_count FROM (SELECT actor_id, - funnel_actors.steps as steps, - arrayJoin(JSONExtractKeysAndValues(groups_0.group_properties_0, 'String')) as prop - FROM funnel_actors - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 2 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON funnel_actors.actor_id == groups_0.group_key) aggregation_target_with_props - GROUP BY prop.1, - prop.2 - HAVING prop.1 NOT IN [] + funnel_actors.steps as steps, /* + We can extract multiple property values at the same time, since we're + already querying the person table. + This gives us something like: + -------------------- + person1, steps, [property_value_0, property_value_1, property_value_2] + person2, steps, [property_value_0, property_value_1, property_value_2] + + To group by property name, we need to extract the property from the array. ArrayJoin helps us do that. + It transforms the above into: + + -------------------- + + person1, steps, property_value_0 + person1, steps, property_value_1 + person1, steps, property_value_2 + + person2, steps, property_value_0 + person2, steps, property_value_1 + person2, steps, property_value_2 + + To avoid clashes and clarify the values, we also zip with the property name, to generate + tuples like: (property_name, property_value), which we then group by + */ arrayJoin(JSONExtractKeysAndValues(groups_0.group_properties_0, 'String')) as prop + FROM funnel_actors + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON funnel_actors.actor_id == groups_0.group_key) aggregation_target_with_props -- Group by the tuple items: (property_name, property_value) generated by zip + + GROUP BY prop.1, + prop.2 + HAVING prop.1 NOT IN [] UNION ALL SELECT 'Total_Values_In_Query' as name, countDistinctIf(actor_id, steps = target_step) AS success_count, @@ -3460,7 +3806,7 @@ (SELECT aggregation_target, steps, avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , + median(step_1_conversion_time) step_1_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_1, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -3468,14 +3814,14 @@ (SELECT aggregation_target, steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , + step_1_conversion_time, latest_0, latest_1, latest_0 FROM (SELECT *, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time FROM @@ -3508,24 +3854,47 @@ ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, max_expanded_ast_elements=1000000), 2 AS target_step - SELECT concat(prop.1, '::', prop.2) as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count + SELECT concat(prop.1, '::', prop.2) as name, -- We generate a unique identifier for each property value as: PropertyName::Value + countDistinctIf(actor_id, steps = target_step) AS success_count, + countDistinctIf(actor_id, steps <> target_step) AS failure_count FROM (SELECT actor_id, - funnel_actors.steps as steps, - arrayJoin(arrayZip(['industry'], [replaceRegexpAll(JSONExtractRaw(groups_0.group_properties_0, 'industry'), '^"|"$', '')])) as prop - FROM funnel_actors - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 2 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON funnel_actors.actor_id == groups_0.group_key) aggregation_target_with_props - GROUP BY prop.1, - prop.2 - HAVING prop.1 NOT IN [] + funnel_actors.steps as steps, /* + We can extract multiple property values at the same time, since we're + already querying the person table. + This gives us something like: + -------------------- + person1, steps, [property_value_0, property_value_1, property_value_2] + person2, steps, [property_value_0, property_value_1, property_value_2] + + To group by property name, we need to extract the property from the array. ArrayJoin helps us do that. + It transforms the above into: + + -------------------- + + person1, steps, property_value_0 + person1, steps, property_value_1 + person1, steps, property_value_2 + + person2, steps, property_value_0 + person2, steps, property_value_1 + person2, steps, property_value_2 + + To avoid clashes and clarify the values, we also zip with the property name, to generate + tuples like: (property_name, property_value), which we then group by + */ arrayJoin(arrayZip(['industry'], [replaceRegexpAll(JSONExtractRaw(groups_0.group_properties_0, 'industry'), '^"|"$', '')])) as prop + FROM funnel_actors + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON funnel_actors.actor_id == groups_0.group_key) aggregation_target_with_props -- Group by the tuple items: (property_name, property_value) generated by zip + + GROUP BY prop.1, + prop.2 + HAVING prop.1 NOT IN [] UNION ALL SELECT 'Total_Values_In_Query' as name, countDistinctIf(actor_id, steps = target_step) AS success_count, @@ -3843,7 +4212,7 @@ (SELECT aggregation_target, steps, avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , + median(step_1_conversion_time) step_1_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_1, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -3851,14 +4220,14 @@ (SELECT aggregation_target, steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , + step_1_conversion_time, latest_0, latest_1, latest_0 FROM (SELECT *, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time FROM @@ -3891,24 +4260,47 @@ ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, max_expanded_ast_elements=1000000), 2 AS target_step - SELECT concat(prop.1, '::', prop.2) as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count + SELECT concat(prop.1, '::', prop.2) as name, -- We generate a unique identifier for each property value as: PropertyName::Value + countDistinctIf(actor_id, steps = target_step) AS success_count, + countDistinctIf(actor_id, steps <> target_step) AS failure_count FROM (SELECT actor_id, - funnel_actors.steps as steps, - arrayJoin(JSONExtractKeysAndValues(groups_0.group_properties_0, 'String')) as prop - FROM funnel_actors - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 2 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON funnel_actors.actor_id == groups_0.group_key) aggregation_target_with_props - GROUP BY prop.1, - prop.2 - HAVING prop.1 NOT IN [] + funnel_actors.steps as steps, /* + We can extract multiple property values at the same time, since we're + already querying the person table. + This gives us something like: + -------------------- + person1, steps, [property_value_0, property_value_1, property_value_2] + person2, steps, [property_value_0, property_value_1, property_value_2] + + To group by property name, we need to extract the property from the array. ArrayJoin helps us do that. + It transforms the above into: + + -------------------- + + person1, steps, property_value_0 + person1, steps, property_value_1 + person1, steps, property_value_2 + + person2, steps, property_value_0 + person2, steps, property_value_1 + person2, steps, property_value_2 + + To avoid clashes and clarify the values, we also zip with the property name, to generate + tuples like: (property_name, property_value), which we then group by + */ arrayJoin(JSONExtractKeysAndValues(groups_0.group_properties_0, 'String')) as prop + FROM funnel_actors + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON funnel_actors.actor_id == groups_0.group_key) aggregation_target_with_props -- Group by the tuple items: (property_name, property_value) generated by zip + + GROUP BY prop.1, + prop.2 + HAVING prop.1 NOT IN [] UNION ALL SELECT 'Total_Values_In_Query' as name, countDistinctIf(actor_id, steps = target_step) AS success_count, @@ -3926,7 +4318,7 @@ (SELECT aggregation_target, steps, avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , + median(step_1_conversion_time) step_1_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_1, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -3934,14 +4326,14 @@ (SELECT aggregation_target, steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , + step_1_conversion_time, latest_0, latest_1, latest_0 FROM (SELECT *, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time FROM @@ -3974,24 +4366,47 @@ ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, max_expanded_ast_elements=1000000), 2 AS target_step - SELECT concat(prop.1, '::', prop.2) as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count + SELECT concat(prop.1, '::', prop.2) as name, -- We generate a unique identifier for each property value as: PropertyName::Value + countDistinctIf(actor_id, steps = target_step) AS success_count, + countDistinctIf(actor_id, steps <> target_step) AS failure_count FROM (SELECT actor_id, - funnel_actors.steps as steps, - arrayJoin(arrayZip(['industry'], [replaceRegexpAll(JSONExtractRaw(groups_0.group_properties_0, 'industry'), '^"|"$', '')])) as prop - FROM funnel_actors - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 2 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON funnel_actors.actor_id == groups_0.group_key) aggregation_target_with_props - GROUP BY prop.1, - prop.2 - HAVING prop.1 NOT IN [] + funnel_actors.steps as steps, /* + We can extract multiple property values at the same time, since we're + already querying the person table. + This gives us something like: + -------------------- + person1, steps, [property_value_0, property_value_1, property_value_2] + person2, steps, [property_value_0, property_value_1, property_value_2] + + To group by property name, we need to extract the property from the array. ArrayJoin helps us do that. + It transforms the above into: + + -------------------- + + person1, steps, property_value_0 + person1, steps, property_value_1 + person1, steps, property_value_2 + + person2, steps, property_value_0 + person2, steps, property_value_1 + person2, steps, property_value_2 + + To avoid clashes and clarify the values, we also zip with the property name, to generate + tuples like: (property_name, property_value), which we then group by + */ arrayJoin(arrayZip(['industry'], [replaceRegexpAll(JSONExtractRaw(groups_0.group_properties_0, 'industry'), '^"|"$', '')])) as prop + FROM funnel_actors + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON funnel_actors.actor_id == groups_0.group_key) aggregation_target_with_props -- Group by the tuple items: (property_name, property_value) generated by zip + + GROUP BY prop.1, + prop.2 + HAVING prop.1 NOT IN [] UNION ALL SELECT 'Total_Values_In_Query' as name, countDistinctIf(actor_id, steps = target_step) AS success_count, @@ -4309,7 +4724,7 @@ (SELECT aggregation_target, steps, avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , + median(step_1_conversion_time) step_1_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_1, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -4317,14 +4732,14 @@ (SELECT aggregation_target, steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , + step_1_conversion_time, latest_0, latest_1, latest_0 FROM (SELECT *, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time FROM @@ -4357,24 +4772,47 @@ ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, max_expanded_ast_elements=1000000), 2 AS target_step - SELECT concat(prop.1, '::', prop.2) as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count + SELECT concat(prop.1, '::', prop.2) as name, -- We generate a unique identifier for each property value as: PropertyName::Value + countDistinctIf(actor_id, steps = target_step) AS success_count, + countDistinctIf(actor_id, steps <> target_step) AS failure_count FROM (SELECT actor_id, - funnel_actors.steps as steps, - arrayJoin(JSONExtractKeysAndValues(groups_0.group_properties_0, 'String')) as prop - FROM funnel_actors - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 2 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON funnel_actors.actor_id == groups_0.group_key) aggregation_target_with_props - GROUP BY prop.1, - prop.2 - HAVING prop.1 NOT IN [] + funnel_actors.steps as steps, /* + We can extract multiple property values at the same time, since we're + already querying the person table. + This gives us something like: + -------------------- + person1, steps, [property_value_0, property_value_1, property_value_2] + person2, steps, [property_value_0, property_value_1, property_value_2] + + To group by property name, we need to extract the property from the array. ArrayJoin helps us do that. + It transforms the above into: + + -------------------- + + person1, steps, property_value_0 + person1, steps, property_value_1 + person1, steps, property_value_2 + + person2, steps, property_value_0 + person2, steps, property_value_1 + person2, steps, property_value_2 + + To avoid clashes and clarify the values, we also zip with the property name, to generate + tuples like: (property_name, property_value), which we then group by + */ arrayJoin(JSONExtractKeysAndValues(groups_0.group_properties_0, 'String')) as prop + FROM funnel_actors + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON funnel_actors.actor_id == groups_0.group_key) aggregation_target_with_props -- Group by the tuple items: (property_name, property_value) generated by zip + + GROUP BY prop.1, + prop.2 + HAVING prop.1 NOT IN [] UNION ALL SELECT 'Total_Values_In_Query' as name, countDistinctIf(actor_id, steps = target_step) AS success_count, @@ -4392,7 +4830,7 @@ (SELECT aggregation_target, steps, avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , + median(step_1_conversion_time) step_1_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_1, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -4400,14 +4838,14 @@ (SELECT aggregation_target, steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , + step_1_conversion_time, latest_0, latest_1, latest_0 FROM (SELECT *, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time FROM @@ -4446,24 +4884,47 @@ ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, max_expanded_ast_elements=1000000), 2 AS target_step - SELECT concat(prop.1, '::', prop.2) as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count + SELECT concat(prop.1, '::', prop.2) as name, -- We generate a unique identifier for each property value as: PropertyName::Value + countDistinctIf(actor_id, steps = target_step) AS success_count, + countDistinctIf(actor_id, steps <> target_step) AS failure_count FROM (SELECT actor_id, - funnel_actors.steps as steps, - arrayJoin(arrayZip(['industry'], [replaceRegexpAll(JSONExtractRaw(groups_0.group_properties_0, 'industry'), '^"|"$', '')])) as prop - FROM funnel_actors - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 2 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON funnel_actors.actor_id == groups_0.group_key) aggregation_target_with_props - GROUP BY prop.1, - prop.2 - HAVING prop.1 NOT IN [] + funnel_actors.steps as steps, /* + We can extract multiple property values at the same time, since we're + already querying the person table. + This gives us something like: + -------------------- + person1, steps, [property_value_0, property_value_1, property_value_2] + person2, steps, [property_value_0, property_value_1, property_value_2] + + To group by property name, we need to extract the property from the array. ArrayJoin helps us do that. + It transforms the above into: + + -------------------- + + person1, steps, property_value_0 + person1, steps, property_value_1 + person1, steps, property_value_2 + + person2, steps, property_value_0 + person2, steps, property_value_1 + person2, steps, property_value_2 + + To avoid clashes and clarify the values, we also zip with the property name, to generate + tuples like: (property_name, property_value), which we then group by + */ arrayJoin(arrayZip(['industry'], [replaceRegexpAll(JSONExtractRaw(groups_0.group_properties_0, 'industry'), '^"|"$', '')])) as prop + FROM funnel_actors + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON funnel_actors.actor_id == groups_0.group_key) aggregation_target_with_props -- Group by the tuple items: (property_name, property_value) generated by zip + + GROUP BY prop.1, + prop.2 + HAVING prop.1 NOT IN [] UNION ALL SELECT 'Total_Values_In_Query' as name, countDistinctIf(actor_id, steps = target_step) AS success_count, @@ -4805,7 +5266,7 @@ (SELECT aggregation_target, steps, avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , + median(step_1_conversion_time) step_1_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_1, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -4813,14 +5274,14 @@ (SELECT aggregation_target, steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , + step_1_conversion_time, latest_0, latest_1, latest_0 FROM (SELECT *, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time FROM @@ -4859,24 +5320,47 @@ ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, max_expanded_ast_elements=1000000), 2 AS target_step - SELECT concat(prop.1, '::', prop.2) as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count + SELECT concat(prop.1, '::', prop.2) as name, -- We generate a unique identifier for each property value as: PropertyName::Value + countDistinctIf(actor_id, steps = target_step) AS success_count, + countDistinctIf(actor_id, steps <> target_step) AS failure_count FROM (SELECT actor_id, - funnel_actors.steps as steps, - arrayJoin(JSONExtractKeysAndValues(groups_0.group_properties_0, 'String')) as prop - FROM funnel_actors - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 2 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON funnel_actors.actor_id == groups_0.group_key) aggregation_target_with_props - GROUP BY prop.1, - prop.2 - HAVING prop.1 NOT IN [] + funnel_actors.steps as steps, /* + We can extract multiple property values at the same time, since we're + already querying the person table. + This gives us something like: + -------------------- + person1, steps, [property_value_0, property_value_1, property_value_2] + person2, steps, [property_value_0, property_value_1, property_value_2] + + To group by property name, we need to extract the property from the array. ArrayJoin helps us do that. + It transforms the above into: + + -------------------- + + person1, steps, property_value_0 + person1, steps, property_value_1 + person1, steps, property_value_2 + + person2, steps, property_value_0 + person2, steps, property_value_1 + person2, steps, property_value_2 + + To avoid clashes and clarify the values, we also zip with the property name, to generate + tuples like: (property_name, property_value), which we then group by + */ arrayJoin(JSONExtractKeysAndValues(groups_0.group_properties_0, 'String')) as prop + FROM funnel_actors + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON funnel_actors.actor_id == groups_0.group_key) aggregation_target_with_props -- Group by the tuple items: (property_name, property_value) generated by zip + + GROUP BY prop.1, + prop.2 + HAVING prop.1 NOT IN [] UNION ALL SELECT 'Total_Values_In_Query' as name, countDistinctIf(actor_id, steps = target_step) AS success_count, diff --git a/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel_correlations_persons.ambr b/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel_correlations_persons.ambr index b616e1ac7e349..2005925389e23 100644 --- a/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel_correlations_persons.ambr +++ b/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel_correlations_persons.ambr @@ -13,7 +13,7 @@ median(step_1_conversion_time) step_1_median_conversion_time_inner, groupArray(10)(step_0_matching_event) as step_0_matching_events, groupArray(10)(step_1_matching_event) as step_1_matching_events, - groupArray(10)(final_matching_event) as final_matching_events , + groupArray(10)(final_matching_event) as final_matching_events, argMax(latest_0, steps) as timestamp, argMax(latest_1, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -30,14 +30,14 @@ "uuid_1", "$session_id_1", "$window_id_1") as step_1_matching_event, - if(isNull(latest_0),(null, null, null, null),if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) as final_matching_event , + if(isNull(latest_0),(null, null, null, null),if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) as final_matching_event, latest_0, latest_1, latest_0 FROM (SELECT *, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, ("latest_0", @@ -111,7 +111,7 @@ toDateTime('2021-01-01 00:00:00', 'UTC') AS date_from, 2 AS target_step, ['$pageview', 'insight analyzed'] as funnel_step_names - SELECT actors.actor_id AS actor_id , + SELECT actors.actor_id AS actor_id, any(actors.matching_events) AS matching_events FROM events AS event JOIN @@ -120,14 +120,27 @@ FROM person_distinct_id2 WHERE team_id = 2 GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON pdi.distinct_id = events.distinct_id - JOIN funnel_actors AS actors ON pdi.person_id = actors.actor_id - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 2 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names + HAVING argMax(is_deleted, version) = 0) AS pdi ON pdi.distinct_id = events.distinct_id -- NOTE: I would love to right join here, so we count get total + -- success/failure numbers in one pass, but this causes out of memory + -- error mentioning issues with right filling. I'm sure there's a way + -- to do it but lifes too short. + + JOIN funnel_actors AS actors ON pdi.person_id = actors.actor_id -- Make sure we're only looking at events before the final step, or + -- failing that, date_to + + WHERE -- add this condition in to ensure we can filter events before + -- joining funnel_actors + toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to + AND event.team_id = 2 -- Add in per actor filtering on event time range. We just want + -- to include events that happened within the bounds of the + -- actors time in the funnel. + + AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) -- Ensure that the event is not outside the bounds of the funnel conversion window + -- Exclude funnel steps + + AND event.event NOT IN funnel_step_names AND event.event = 'insight loaded' AND actors.steps = target_step GROUP BY actor_id @@ -164,7 +177,7 @@ groupArray(10)(step_0_matching_event) as step_0_matching_events, groupArray(10)(step_1_matching_event) as step_1_matching_events, groupArray(10)(step_2_matching_event) as step_2_matching_events, - groupArray(10)(final_matching_event) as final_matching_events , + groupArray(10)(final_matching_event) as final_matching_events, argMax(latest_0, steps) as timestamp, argMax(latest_2, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -186,7 +199,7 @@ "uuid_2", "$session_id_2", "$window_id_2") as step_2_matching_event, - if(isNull(latest_0),(null, null, null, null),if(isNull(latest_1), step_0_matching_event, if(isNull(latest_2), step_1_matching_event, step_2_matching_event))) as final_matching_event , + if(isNull(latest_0),(null, null, null, null),if(isNull(latest_1), step_0_matching_event, if(isNull(latest_2), step_1_matching_event, step_2_matching_event))) as final_matching_event, latest_0, latest_2, latest_0 @@ -196,7 +209,7 @@ AND latest_1 <= latest_0 + INTERVAL 14 DAY AND latest_1 <= latest_2 AND latest_2 <= latest_0 + INTERVAL 14 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1)) AS steps , + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1)) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, if(isNotNull(latest_2) @@ -327,7 +340,7 @@ toDateTime('2021-01-01 00:00:00', 'UTC') AS date_from, 3 AS target_step, ['$pageview', 'insight analyzed', 'insight updated'] as funnel_step_names - SELECT actors.actor_id AS actor_id , + SELECT actors.actor_id AS actor_id, any(actors.matching_events) AS matching_events FROM events AS event JOIN @@ -336,14 +349,27 @@ FROM person_distinct_id2 WHERE team_id = 2 GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON pdi.distinct_id = events.distinct_id - JOIN funnel_actors AS actors ON pdi.person_id = actors.actor_id - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 2 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names + HAVING argMax(is_deleted, version) = 0) AS pdi ON pdi.distinct_id = events.distinct_id -- NOTE: I would love to right join here, so we count get total + -- success/failure numbers in one pass, but this causes out of memory + -- error mentioning issues with right filling. I'm sure there's a way + -- to do it but lifes too short. + + JOIN funnel_actors AS actors ON pdi.person_id = actors.actor_id -- Make sure we're only looking at events before the final step, or + -- failing that, date_to + + WHERE -- add this condition in to ensure we can filter events before + -- joining funnel_actors + toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to + AND event.team_id = 2 -- Add in per actor filtering on event time range. We just want + -- to include events that happened within the bounds of the + -- actors time in the funnel. + + AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) -- Ensure that the event is not outside the bounds of the funnel conversion window + -- Exclude funnel steps + + AND event.event NOT IN funnel_step_names AND event.event = 'insight loaded' AND actors.steps <> target_step GROUP BY actor_id diff --git a/ee/clickhouse/queries/test/__snapshots__/test_lifecycle.ambr b/ee/clickhouse/queries/test/__snapshots__/test_lifecycle.ambr index 4a1fa4a377eba..67abfff2fa9b3 100644 --- a/ee/clickhouse/queries/test/__snapshots__/test_lifecycle.ambr +++ b/ee/clickhouse/queries/test/__snapshots__/test_lifecycle.ambr @@ -1,8 +1,10 @@ # serializer version: 1 # name: TestClickhouseLifecycle.test_interval_dates_days ''' - WITH 'day' AS selected_period, - periods AS + WITH 'day' AS selected_period, -- enumerate all requested periods, so we can zero fill as needed. + -- NOTE: we use dateSub interval rather than seconds, which means we can handle, + -- for instance, month intervals which do not have a fixed number of seconds. + periods AS (SELECT dateSub(day, number, dateTrunc(selected_period, toDateTime('2021-05-05 23:59:59', 'UTC'))) AS start_of_period FROM numbers(dateDiff('day', dateTrunc('day', toDateTime('2021-04-28 00:00:00', 'UTC')), dateTrunc('day', toDateTime('2021-05-05 23:59:59', 'UTC') + INTERVAL 1 day)))) SELECT groupArray(start_of_period) AS date, @@ -16,7 +18,8 @@ (SELECT periods.start_of_period as start_of_period, toUInt16(0) AS counts, status - FROM periods + FROM periods -- Zero fill for each status + CROSS JOIN (SELECT status FROM @@ -74,8 +77,10 @@ # --- # name: TestClickhouseLifecycle.test_interval_dates_months ''' - WITH 'month' AS selected_period, - periods AS + WITH 'month' AS selected_period, -- enumerate all requested periods, so we can zero fill as needed. + -- NOTE: we use dateSub interval rather than seconds, which means we can handle, + -- for instance, month intervals which do not have a fixed number of seconds. + periods AS (SELECT dateSub(month, number, dateTrunc(selected_period, toDateTime('2021-05-05 23:59:59', 'UTC'))) AS start_of_period FROM numbers(dateDiff('month', dateTrunc('month', toDateTime('2021-02-04 00:00:00', 'UTC')), dateTrunc('month', toDateTime('2021-05-05 23:59:59', 'UTC') + INTERVAL 1 month)))) SELECT groupArray(start_of_period) AS date, @@ -89,7 +94,8 @@ (SELECT periods.start_of_period as start_of_period, toUInt16(0) AS counts, status - FROM periods + FROM periods -- Zero fill for each status + CROSS JOIN (SELECT status FROM @@ -147,8 +153,10 @@ # --- # name: TestClickhouseLifecycle.test_interval_dates_weeks ''' - WITH 'week' AS selected_period, - periods AS + WITH 'week' AS selected_period, -- enumerate all requested periods, so we can zero fill as needed. + -- NOTE: we use dateSub interval rather than seconds, which means we can handle, + -- for instance, month intervals which do not have a fixed number of seconds. + periods AS (SELECT dateSub(week, number, dateTrunc(selected_period, toDateTime('2021-05-06 23:59:59', 'UTC'))) AS start_of_period FROM numbers(dateDiff('week', dateTrunc('week', toDateTime('2021-04-06 00:00:00', 'UTC')), dateTrunc('week', toDateTime('2021-05-06 23:59:59', 'UTC') + INTERVAL 1 week)))) SELECT groupArray(start_of_period) AS date, @@ -162,7 +170,8 @@ (SELECT periods.start_of_period as start_of_period, toUInt16(0) AS counts, status - FROM periods + FROM periods -- Zero fill for each status + CROSS JOIN (SELECT status FROM @@ -220,8 +229,10 @@ # --- # name: TestClickhouseLifecycle.test_lifecycle_edge_cases ''' - WITH 'day' AS selected_period, - periods AS + WITH 'day' AS selected_period, -- enumerate all requested periods, so we can zero fill as needed. + -- NOTE: we use dateSub interval rather than seconds, which means we can handle, + -- for instance, month intervals which do not have a fixed number of seconds. + periods AS (SELECT dateSub(day, number, dateTrunc(selected_period, toDateTime('2020-01-18 23:59:59', 'UTC'))) AS start_of_period FROM numbers(dateDiff('day', dateTrunc('day', toDateTime('2020-01-11 00:00:00', 'UTC')), dateTrunc('day', toDateTime('2020-01-18 23:59:59', 'UTC') + INTERVAL 1 day)))) SELECT groupArray(start_of_period) AS date, @@ -235,7 +246,8 @@ (SELECT periods.start_of_period as start_of_period, toUInt16(0) AS counts, status - FROM periods + FROM periods -- Zero fill for each status + CROSS JOIN (SELECT status FROM @@ -293,8 +305,10 @@ # --- # name: TestClickhouseLifecycle.test_lifecycle_hogql_event_properties ''' - WITH 'day' AS selected_period, - periods AS + WITH 'day' AS selected_period, -- enumerate all requested periods, so we can zero fill as needed. + -- NOTE: we use dateSub interval rather than seconds, which means we can handle, + -- for instance, month intervals which do not have a fixed number of seconds. + periods AS (SELECT dateSub(day, number, dateTrunc(selected_period, toDateTime('2021-05-05 23:59:59', 'UTC'))) AS start_of_period FROM numbers(dateDiff('day', dateTrunc('day', toDateTime('2021-04-28 00:00:00', 'UTC')), dateTrunc('day', toDateTime('2021-05-05 23:59:59', 'UTC') + INTERVAL 1 day)))) SELECT groupArray(start_of_period) AS date, @@ -308,7 +322,8 @@ (SELECT periods.start_of_period as start_of_period, toUInt16(0) AS counts, status - FROM periods + FROM periods -- Zero fill for each status + CROSS JOIN (SELECT status FROM @@ -367,8 +382,10 @@ # --- # name: TestClickhouseLifecycle.test_lifecycle_hogql_event_properties_materialized ''' - WITH 'day' AS selected_period, - periods AS + WITH 'day' AS selected_period, -- enumerate all requested periods, so we can zero fill as needed. + -- NOTE: we use dateSub interval rather than seconds, which means we can handle, + -- for instance, month intervals which do not have a fixed number of seconds. + periods AS (SELECT dateSub(day, number, dateTrunc(selected_period, toDateTime('2021-05-05 23:59:59', 'UTC'))) AS start_of_period FROM numbers(dateDiff('day', dateTrunc('day', toDateTime('2021-04-28 00:00:00', 'UTC')), dateTrunc('day', toDateTime('2021-05-05 23:59:59', 'UTC') + INTERVAL 1 day)))) SELECT groupArray(start_of_period) AS date, @@ -382,7 +399,8 @@ (SELECT periods.start_of_period as start_of_period, toUInt16(0) AS counts, status - FROM periods + FROM periods -- Zero fill for each status + CROSS JOIN (SELECT status FROM @@ -441,8 +459,10 @@ # --- # name: TestClickhouseLifecycle.test_lifecycle_hogql_person_properties ''' - WITH 'day' AS selected_period, - periods AS + WITH 'day' AS selected_period, -- enumerate all requested periods, so we can zero fill as needed. + -- NOTE: we use dateSub interval rather than seconds, which means we can handle, + -- for instance, month intervals which do not have a fixed number of seconds. + periods AS (SELECT dateSub(day, number, dateTrunc(selected_period, toDateTime('2021-05-05 23:59:59', 'UTC'))) AS start_of_period FROM numbers(dateDiff('day', dateTrunc('day', toDateTime('2021-04-28 00:00:00', 'UTC')), dateTrunc('day', toDateTime('2021-05-05 23:59:59', 'UTC') + INTERVAL 1 day)))) SELECT groupArray(start_of_period) AS date, @@ -456,7 +476,8 @@ (SELECT periods.start_of_period as start_of_period, toUInt16(0) AS counts, status - FROM periods + FROM periods -- Zero fill for each status + CROSS JOIN (SELECT status FROM @@ -516,8 +537,10 @@ # --- # name: TestClickhouseLifecycle.test_lifecycle_hogql_person_properties_materialized ''' - WITH 'day' AS selected_period, - periods AS + WITH 'day' AS selected_period, -- enumerate all requested periods, so we can zero fill as needed. + -- NOTE: we use dateSub interval rather than seconds, which means we can handle, + -- for instance, month intervals which do not have a fixed number of seconds. + periods AS (SELECT dateSub(day, number, dateTrunc(selected_period, toDateTime('2021-05-05 23:59:59', 'UTC'))) AS start_of_period FROM numbers(dateDiff('day', dateTrunc('day', toDateTime('2021-04-28 00:00:00', 'UTC')), dateTrunc('day', toDateTime('2021-05-05 23:59:59', 'UTC') + INTERVAL 1 day)))) SELECT groupArray(start_of_period) AS date, @@ -531,7 +554,8 @@ (SELECT periods.start_of_period as start_of_period, toUInt16(0) AS counts, status - FROM periods + FROM periods -- Zero fill for each status + CROSS JOIN (SELECT status FROM @@ -591,8 +615,10 @@ # --- # name: TestClickhouseLifecycle.test_test_account_filters_with_groups ''' - WITH 'day' AS selected_period, - periods AS + WITH 'day' AS selected_period, -- enumerate all requested periods, so we can zero fill as needed. + -- NOTE: we use dateSub interval rather than seconds, which means we can handle, + -- for instance, month intervals which do not have a fixed number of seconds. + periods AS (SELECT dateSub(day, number, dateTrunc(selected_period, toDateTime('2020-01-19 23:59:59', 'UTC'))) AS start_of_period FROM numbers(dateDiff('day', dateTrunc('day', toDateTime('2020-01-12 00:00:00', 'UTC')), dateTrunc('day', toDateTime('2020-01-19 23:59:59', 'UTC') + INTERVAL 1 day)))) SELECT groupArray(start_of_period) AS date, @@ -606,7 +632,8 @@ (SELECT periods.start_of_period as start_of_period, toUInt16(0) AS counts, status - FROM periods + FROM periods -- Zero fill for each status + CROSS JOIN (SELECT status FROM diff --git a/ee/clickhouse/queries/test/__snapshots__/test_paths.ambr b/ee/clickhouse/queries/test/__snapshots__/test_paths.ambr index 958e129bc967d..8145617c8749b 100644 --- a/ee/clickhouse/queries/test/__snapshots__/test_paths.ambr +++ b/ee/clickhouse/queries/test/__snapshots__/test_paths.ambr @@ -9,7 +9,7 @@ avg(step_1_conversion_time) step_1_average_conversion_time_inner, avg(step_2_conversion_time) step_2_average_conversion_time_inner, median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , + median(step_2_conversion_time) step_2_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_2, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -18,7 +18,7 @@ steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, step_1_conversion_time, - step_2_conversion_time , + step_2_conversion_time, latest_0, latest_2, latest_0 @@ -28,7 +28,7 @@ AND latest_1 <= latest_0 + INTERVAL 7 DAY AND latest_1 <= latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, if(isNotNull(latest_2) @@ -109,30 +109,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -166,7 +166,7 @@ AND e.timestamp >= target_timestamp ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -190,7 +190,7 @@ avg(step_1_conversion_time) step_1_average_conversion_time_inner, avg(step_2_conversion_time) step_2_average_conversion_time_inner, median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , + median(step_2_conversion_time) step_2_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_2, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -199,7 +199,7 @@ steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, step_1_conversion_time, - step_2_conversion_time , + step_2_conversion_time, latest_0, latest_2, latest_0 @@ -209,7 +209,7 @@ AND latest_1 <= latest_0 + INTERVAL 7 DAY AND latest_1 <= latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, if(isNotNull(latest_2) @@ -287,30 +287,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -344,7 +344,7 @@ AND e.timestamp >= target_timestamp ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -366,7 +366,7 @@ avg(step_1_conversion_time) step_1_average_conversion_time_inner, avg(step_2_conversion_time) step_2_average_conversion_time_inner, median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , + median(step_2_conversion_time) step_2_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_2, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -375,7 +375,7 @@ steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, step_1_conversion_time, - step_2_conversion_time , + step_2_conversion_time, latest_0, latest_2, latest_0 @@ -385,7 +385,7 @@ AND latest_1 <= latest_0 + INTERVAL 7 DAY AND latest_1 <= latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, if(isNotNull(latest_2) @@ -463,30 +463,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -520,7 +520,7 @@ AND e.timestamp >= target_timestamp ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -542,7 +542,7 @@ avg(step_1_conversion_time) step_1_average_conversion_time_inner, avg(step_2_conversion_time) step_2_average_conversion_time_inner, median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , + median(step_2_conversion_time) step_2_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_2, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -551,7 +551,7 @@ steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, step_1_conversion_time, - step_2_conversion_time , + step_2_conversion_time, latest_0, latest_2, latest_0 @@ -561,7 +561,7 @@ AND latest_1 <= latest_0 + INTERVAL 7 DAY AND latest_1 <= latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, if(isNotNull(latest_2) @@ -639,30 +639,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -696,7 +696,7 @@ AND e.timestamp >= target_timestamp ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -718,7 +718,7 @@ avg(step_1_conversion_time) step_1_average_conversion_time_inner, avg(step_2_conversion_time) step_2_average_conversion_time_inner, median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , + median(step_2_conversion_time) step_2_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_2, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -727,7 +727,7 @@ steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, step_1_conversion_time, - step_2_conversion_time , + step_2_conversion_time, latest_0, latest_2, latest_0 @@ -737,7 +737,7 @@ AND latest_1 <= latest_0 + INTERVAL 7 DAY AND latest_1 <= latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, if(isNotNull(latest_2) @@ -815,30 +815,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -872,7 +872,7 @@ AND e.timestamp >= target_timestamp ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -894,7 +894,7 @@ avg(step_1_conversion_time) step_1_average_conversion_time_inner, avg(step_2_conversion_time) step_2_average_conversion_time_inner, median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , + median(step_2_conversion_time) step_2_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_2, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -903,7 +903,7 @@ steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, step_1_conversion_time, - step_2_conversion_time , + step_2_conversion_time, latest_0, latest_2, latest_0 @@ -913,7 +913,7 @@ AND latest_1 <= latest_0 + INTERVAL 7 DAY AND latest_1 <= latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, if(isNotNull(latest_2) @@ -994,30 +994,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -1059,7 +1059,7 @@ AND e.timestamp >= target_timestamp ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -1083,7 +1083,7 @@ avg(step_1_conversion_time) step_1_average_conversion_time_inner, avg(step_2_conversion_time) step_2_average_conversion_time_inner, median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , + median(step_2_conversion_time) step_2_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_2, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -1092,7 +1092,7 @@ steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, step_1_conversion_time, - step_2_conversion_time , + step_2_conversion_time, latest_0, latest_2, latest_0 @@ -1102,7 +1102,7 @@ AND latest_1 <= latest_0 + INTERVAL 7 DAY AND latest_1 <= latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, if(isNotNull(latest_2) @@ -1180,30 +1180,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -1245,7 +1245,7 @@ AND e.timestamp >= target_timestamp ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -1267,7 +1267,7 @@ avg(step_1_conversion_time) step_1_average_conversion_time_inner, avg(step_2_conversion_time) step_2_average_conversion_time_inner, median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , + median(step_2_conversion_time) step_2_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_2, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -1276,7 +1276,7 @@ steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, step_1_conversion_time, - step_2_conversion_time , + step_2_conversion_time, latest_0, latest_2, latest_0 @@ -1286,7 +1286,7 @@ AND latest_1 <= latest_0 + INTERVAL 7 DAY AND latest_1 <= latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, if(isNotNull(latest_2) @@ -1364,30 +1364,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -1429,7 +1429,7 @@ AND e.timestamp >= target_timestamp ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -1451,7 +1451,7 @@ avg(step_1_conversion_time) step_1_average_conversion_time_inner, avg(step_2_conversion_time) step_2_average_conversion_time_inner, median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , + median(step_2_conversion_time) step_2_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_2, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -1460,7 +1460,7 @@ steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, step_1_conversion_time, - step_2_conversion_time , + step_2_conversion_time, latest_0, latest_2, latest_0 @@ -1470,7 +1470,7 @@ AND latest_1 <= latest_0 + INTERVAL 7 DAY AND latest_1 <= latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, if(isNotNull(latest_2) @@ -1548,30 +1548,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -1613,7 +1613,7 @@ AND e.timestamp >= target_timestamp ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -1635,7 +1635,7 @@ avg(step_1_conversion_time) step_1_average_conversion_time_inner, avg(step_2_conversion_time) step_2_average_conversion_time_inner, median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , + median(step_2_conversion_time) step_2_median_conversion_time_inner, argMax(latest_0, steps) as timestamp, argMax(latest_2, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -1644,7 +1644,7 @@ steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, step_1_conversion_time, - step_2_conversion_time , + step_2_conversion_time, latest_0, latest_2, latest_0 @@ -1654,7 +1654,7 @@ AND latest_1 <= latest_0 + INTERVAL 7 DAY AND latest_1 <= latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, if(isNotNull(latest_2) @@ -1732,30 +1732,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -1797,7 +1797,7 @@ AND e.timestamp >= target_timestamp ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -1819,7 +1819,7 @@ avg(step_1_conversion_time) step_1_average_conversion_time_inner, avg(step_2_conversion_time) step_2_average_conversion_time_inner, median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , + median(step_2_conversion_time) step_2_median_conversion_time_inner, argMax(latest_1, steps) as timestamp, argMax(latest_2, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -1828,7 +1828,7 @@ steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, step_1_conversion_time, - step_2_conversion_time , + step_2_conversion_time, latest_1, latest_2, latest_0 @@ -1838,7 +1838,7 @@ AND latest_1 <= latest_0 + INTERVAL 7 DAY AND latest_1 <= latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, if(isNotNull(latest_2) @@ -1919,30 +1919,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -1976,7 +1976,7 @@ AND e.timestamp >= target_timestamp ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -2000,7 +2000,7 @@ avg(step_1_conversion_time) step_1_average_conversion_time_inner, avg(step_2_conversion_time) step_2_average_conversion_time_inner, median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , + median(step_2_conversion_time) step_2_median_conversion_time_inner, argMax(latest_1, steps) as timestamp, argMax(latest_2, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -2009,7 +2009,7 @@ steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, step_1_conversion_time, - step_2_conversion_time , + step_2_conversion_time, latest_1, latest_2, latest_0 @@ -2019,7 +2019,7 @@ AND latest_1 <= latest_0 + INTERVAL 7 DAY AND latest_1 <= latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, if(isNotNull(latest_2) @@ -2100,30 +2100,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -2157,7 +2157,7 @@ AND e.timestamp >= target_timestamp ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -2181,7 +2181,7 @@ avg(step_1_conversion_time) step_1_average_conversion_time_inner, avg(step_2_conversion_time) step_2_average_conversion_time_inner, median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , + median(step_2_conversion_time) step_2_median_conversion_time_inner, argMax(latest_1, steps) as timestamp, argMax(latest_2, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -2190,7 +2190,7 @@ steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, step_1_conversion_time, - step_2_conversion_time , + step_2_conversion_time, latest_1, latest_2, latest_0 @@ -2200,7 +2200,7 @@ AND latest_1 <= latest_0 + INTERVAL 7 DAY AND latest_1 <= latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, if(isNotNull(latest_2) @@ -2281,30 +2281,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -2338,7 +2338,7 @@ AND e.timestamp <= target_timestamp + INTERVAL 7 DAY ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -2362,7 +2362,7 @@ avg(step_1_conversion_time) step_1_average_conversion_time_inner, avg(step_2_conversion_time) step_2_average_conversion_time_inner, median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , + median(step_2_conversion_time) step_2_median_conversion_time_inner, argMax(latest_1, steps) as timestamp, argMax(latest_2, steps) as final_timestamp, argMax(latest_0, steps) as first_timestamp @@ -2371,7 +2371,7 @@ steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, step_1_conversion_time, - step_2_conversion_time , + step_2_conversion_time, latest_1, latest_2, latest_0 @@ -2381,7 +2381,7 @@ AND latest_1 <= latest_0 + INTERVAL 7 DAY AND latest_1 <= latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, if(isNotNull(latest_2) @@ -2462,30 +2462,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -2519,7 +2519,7 @@ AND e.timestamp <= target_timestamp ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -2545,7 +2545,7 @@ avg(step_1_conversion_time) step_1_average_conversion_time_inner, avg(step_2_conversion_time) step_2_average_conversion_time_inner, median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , + median(step_2_conversion_time) step_2_median_conversion_time_inner, argMax(latest_1, steps) as max_timestamp, argMax(latest_0, steps) as min_timestamp FROM @@ -2553,7 +2553,7 @@ steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, step_1_conversion_time, - step_2_conversion_time , + step_2_conversion_time, latest_1, latest_0 FROM @@ -2562,7 +2562,7 @@ AND latest_1 <= latest_0 + INTERVAL 7 DAY AND latest_1 <= latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, if(isNotNull(latest_2) @@ -2643,30 +2643,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -2702,7 +2702,7 @@ AND e.timestamp <= max_timestamp ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -2728,7 +2728,7 @@ avg(step_1_conversion_time) step_1_average_conversion_time_inner, avg(step_2_conversion_time) step_2_average_conversion_time_inner, median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , + median(step_2_conversion_time) step_2_median_conversion_time_inner, argMax(latest_1, steps) as max_timestamp, argMax(latest_0, steps) as min_timestamp FROM @@ -2736,7 +2736,7 @@ steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, step_1_conversion_time, - step_2_conversion_time , + step_2_conversion_time, latest_1, latest_0 FROM @@ -2745,7 +2745,7 @@ AND latest_1 <= latest_0 + INTERVAL 7 DAY AND latest_1 <= latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, if(isNotNull(latest_2) @@ -2823,30 +2823,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -2882,7 +2882,7 @@ AND e.timestamp <= max_timestamp ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -2906,7 +2906,7 @@ avg(step_1_conversion_time) step_1_average_conversion_time_inner, avg(step_2_conversion_time) step_2_average_conversion_time_inner, median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , + median(step_2_conversion_time) step_2_median_conversion_time_inner, argMax(latest_1, steps) as max_timestamp, argMax(latest_0, steps) as min_timestamp FROM @@ -2914,7 +2914,7 @@ steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, step_1_conversion_time, - step_2_conversion_time , + step_2_conversion_time, latest_1, latest_0 FROM @@ -2923,7 +2923,7 @@ AND latest_1 <= latest_0 + INTERVAL 7 DAY AND latest_1 <= latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, if(isNotNull(latest_2) @@ -3001,30 +3001,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -3060,7 +3060,7 @@ AND e.timestamp <= max_timestamp ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -3084,7 +3084,7 @@ avg(step_1_conversion_time) step_1_average_conversion_time_inner, avg(step_2_conversion_time) step_2_average_conversion_time_inner, median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , + median(step_2_conversion_time) step_2_median_conversion_time_inner, argMax(latest_1, steps) as max_timestamp, argMax(latest_0, steps) as min_timestamp FROM @@ -3092,7 +3092,7 @@ steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, step_1_conversion_time, - step_2_conversion_time , + step_2_conversion_time, latest_1, latest_0 FROM @@ -3101,7 +3101,7 @@ AND latest_1 <= latest_0 + INTERVAL 7 DAY AND latest_1 <= latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, if(isNotNull(latest_2) @@ -3179,30 +3179,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -3238,7 +3238,7 @@ AND e.timestamp <= max_timestamp ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -3262,7 +3262,7 @@ avg(step_1_conversion_time) step_1_average_conversion_time_inner, avg(step_2_conversion_time) step_2_average_conversion_time_inner, median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , + median(step_2_conversion_time) step_2_median_conversion_time_inner, argMax(latest_1, steps) as max_timestamp, argMax(latest_0, steps) as min_timestamp FROM @@ -3270,7 +3270,7 @@ steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, step_1_conversion_time, - step_2_conversion_time , + step_2_conversion_time, latest_1, latest_0 FROM @@ -3279,7 +3279,7 @@ AND latest_1 <= latest_0 + INTERVAL 7 DAY AND latest_1 <= latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, if(isNotNull(latest_2) @@ -3357,30 +3357,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -3416,7 +3416,7 @@ AND e.timestamp <= max_timestamp ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -3440,7 +3440,7 @@ avg(step_1_conversion_time) step_1_average_conversion_time_inner, avg(step_2_conversion_time) step_2_average_conversion_time_inner, median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , + median(step_2_conversion_time) step_2_median_conversion_time_inner, argMax(latest_1, steps) as max_timestamp, argMax(latest_0, steps) as min_timestamp FROM @@ -3448,7 +3448,7 @@ steps, max(steps) over (PARTITION BY aggregation_target) as max_steps, step_1_conversion_time, - step_2_conversion_time , + step_2_conversion_time, latest_1, latest_0 FROM @@ -3457,7 +3457,7 @@ AND latest_1 <= latest_0 + INTERVAL 7 DAY AND latest_1 <= latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps, if(isNotNull(latest_1) AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, if(isNotNull(latest_2) @@ -3535,30 +3535,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -3594,7 +3594,7 @@ AND e.timestamp <= max_timestamp ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -3608,7 +3608,6 @@ # --- # name: TestClickhousePaths.test_end ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -3622,30 +3621,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, '/about') as target_index , - if(target_index > 0, arrayResize(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arrayResize(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, (-1) * 5) as limited_path , - arraySlice(filtered_timings, (-1) * 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, '/about') as target_index, + if(target_index > 0, arrayResize(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arrayResize(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, (-1) * 5) as limited_path, + arraySlice(filtered_timings, (-1) * 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -3676,7 +3675,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -3693,7 +3692,6 @@ # --- # name: TestClickhousePaths.test_end.1 ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -3707,30 +3705,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, '/about') as target_index , - if(target_index > 0, arrayResize(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arrayResize(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, (-1) * 5) as limited_path , - arraySlice(filtered_timings, (-1) * 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, '/about') as target_index, + if(target_index > 0, arrayResize(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arrayResize(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, (-1) * 5) as limited_path, + arraySlice(filtered_timings, (-1) * 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -3761,7 +3759,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -3778,7 +3776,6 @@ # --- # name: TestClickhousePaths.test_end_materialized ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -3792,30 +3789,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, '/about') as target_index , - if(target_index > 0, arrayResize(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arrayResize(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, (-1) * 5) as limited_path , - arraySlice(filtered_timings, (-1) * 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, '/about') as target_index, + if(target_index > 0, arrayResize(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arrayResize(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, (-1) * 5) as limited_path, + arraySlice(filtered_timings, (-1) * 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -3846,7 +3843,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -3863,7 +3860,6 @@ # --- # name: TestClickhousePaths.test_end_materialized.1 ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -3877,30 +3873,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, '/about') as target_index , - if(target_index > 0, arrayResize(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arrayResize(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, (-1) * 5) as limited_path , - arraySlice(filtered_timings, (-1) * 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, '/about') as target_index, + if(target_index > 0, arrayResize(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arrayResize(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, (-1) * 5) as limited_path, + arraySlice(filtered_timings, (-1) * 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -3931,7 +3927,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -3948,7 +3944,6 @@ # --- # name: TestClickhousePaths.test_event_exclusion_filters_with_wildcard_groups ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -3962,30 +3957,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 4) as limited_path , - arraySlice(filtered_timings, 1, 4) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 4) as limited_path, + arraySlice(filtered_timings, 1, 4) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -4019,7 +4014,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2023-05-23 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -4035,7 +4030,6 @@ # --- # name: TestClickhousePaths.test_event_exclusion_filters_with_wildcard_groups.1 ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -4049,30 +4043,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 4) as limited_path , - arraySlice(filtered_timings, 1, 4) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 4) as limited_path, + arraySlice(filtered_timings, 1, 4) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -4106,7 +4100,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2023-05-23 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -4122,7 +4116,6 @@ # --- # name: TestClickhousePaths.test_event_inclusion_exclusion_filters ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -4136,30 +4129,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 4) as limited_path , - arraySlice(filtered_timings, 1, 4) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 4) as limited_path, + arraySlice(filtered_timings, 1, 4) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -4192,7 +4185,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2023-05-23 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -4208,7 +4201,6 @@ # --- # name: TestClickhousePaths.test_event_inclusion_exclusion_filters.1 ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -4222,30 +4214,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 4) as limited_path , - arraySlice(filtered_timings, 1, 4) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 4) as limited_path, + arraySlice(filtered_timings, 1, 4) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -4278,7 +4270,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2023-05-23 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -4294,7 +4286,6 @@ # --- # name: TestClickhousePaths.test_event_inclusion_exclusion_filters.2 ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -4308,30 +4299,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 4) as limited_path , - arraySlice(filtered_timings, 1, 4) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 4) as limited_path, + arraySlice(filtered_timings, 1, 4) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -4364,7 +4355,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2023-05-23 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -4380,7 +4371,6 @@ # --- # name: TestClickhousePaths.test_event_inclusion_exclusion_filters.3 ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -4394,30 +4384,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 4) as limited_path , - arraySlice(filtered_timings, 1, 4) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 4) as limited_path, + arraySlice(filtered_timings, 1, 4) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -4450,7 +4440,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2023-05-23 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -4466,7 +4456,6 @@ # --- # name: TestClickhousePaths.test_event_inclusion_exclusion_filters.4 ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -4480,30 +4469,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 4) as limited_path , - arraySlice(filtered_timings, 1, 4) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 4) as limited_path, + arraySlice(filtered_timings, 1, 4) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -4536,7 +4525,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2023-05-23 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -4552,7 +4541,6 @@ # --- # name: TestClickhousePaths.test_event_inclusion_exclusion_filters.5 ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -4566,30 +4554,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 4) as limited_path , - arraySlice(filtered_timings, 1, 4) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 4) as limited_path, + arraySlice(filtered_timings, 1, 4) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -4626,7 +4614,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2023-05-23 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -4642,7 +4630,6 @@ # --- # name: TestClickhousePaths.test_event_inclusion_exclusion_filters.6 ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -4656,30 +4643,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 4) as limited_path , - arraySlice(filtered_timings, 1, 4) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 4) as limited_path, + arraySlice(filtered_timings, 1, 4) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -4717,7 +4704,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2023-05-23 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -4733,7 +4720,6 @@ # --- # name: TestClickhousePaths.test_event_ordering ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -4747,30 +4733,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -4803,7 +4789,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-03 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -4819,7 +4805,6 @@ # --- # name: TestClickhousePaths.test_groups_filtering_person_on_events ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -4833,30 +4818,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 4) as limited_path , - arraySlice(filtered_timings, 1, 4) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 4) as limited_path, + arraySlice(filtered_timings, 1, 4) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -4886,7 +4871,7 @@ AND notEmpty(e.person_id) ORDER BY e.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -4902,7 +4887,6 @@ # --- # name: TestClickhousePaths.test_groups_filtering_person_on_events.1 ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -4916,30 +4900,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 4) as limited_path , - arraySlice(filtered_timings, 1, 4) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 4) as limited_path, + arraySlice(filtered_timings, 1, 4) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -4969,7 +4953,7 @@ AND notEmpty(e.person_id) ORDER BY e.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -4985,7 +4969,6 @@ # --- # name: TestClickhousePaths.test_groups_filtering_person_on_events.2 ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -4999,30 +4982,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 4) as limited_path , - arraySlice(filtered_timings, 1, 4) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 4) as limited_path, + arraySlice(filtered_timings, 1, 4) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -5052,7 +5035,7 @@ AND notEmpty(e.person_id) ORDER BY e.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -5068,7 +5051,6 @@ # --- # name: TestClickhousePaths.test_path_cleaning_rules_with_wildcard_groups ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -5082,30 +5064,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, '/step1') as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, '/step1') as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -5139,7 +5121,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -5156,7 +5138,6 @@ # --- # name: TestClickhousePaths.test_person_dropoffs ''' - SELECT DISTINCT person_id AS actor_id FROM (SELECT person_id, @@ -5167,30 +5148,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -5223,7 +5204,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -5237,7 +5218,6 @@ # --- # name: TestClickhousePaths.test_person_dropoffs.1 ''' - SELECT DISTINCT person_id AS actor_id FROM (SELECT person_id, @@ -5248,30 +5228,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -5304,7 +5284,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -5317,7 +5297,6 @@ # --- # name: TestClickhousePaths.test_person_dropoffs.2 ''' - SELECT DISTINCT person_id AS actor_id FROM (SELECT person_id, @@ -5328,30 +5307,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -5384,7 +5363,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -5397,7 +5376,6 @@ # --- # name: TestClickhousePaths.test_person_dropoffs.3 ''' - SELECT DISTINCT person_id AS actor_id FROM (SELECT person_id, @@ -5408,30 +5386,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -5464,7 +5442,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -5478,7 +5456,6 @@ # --- # name: TestClickhousePaths.test_person_dropoffs.4 ''' - SELECT DISTINCT person_id AS actor_id FROM (SELECT person_id, @@ -5489,30 +5466,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -5545,7 +5522,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -5558,7 +5535,6 @@ # --- # name: TestClickhousePaths.test_person_dropoffs.5 ''' - SELECT DISTINCT person_id AS actor_id FROM (SELECT person_id, @@ -5569,30 +5545,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -5625,7 +5601,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -5638,7 +5614,6 @@ # --- # name: TestClickhousePaths.test_person_dropoffs.6 ''' - SELECT DISTINCT person_id AS actor_id FROM (SELECT person_id, @@ -5649,30 +5624,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -5705,7 +5680,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -5719,7 +5694,6 @@ # --- # name: TestClickhousePaths.test_person_dropoffs.7 ''' - SELECT DISTINCT person_id AS actor_id FROM (SELECT person_id, @@ -5730,30 +5704,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -5786,7 +5760,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -5799,7 +5773,6 @@ # --- # name: TestClickhousePaths.test_person_dropoffs.8 ''' - SELECT DISTINCT person_id AS actor_id FROM (SELECT person_id, @@ -5810,30 +5783,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -5866,7 +5839,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -5893,7 +5866,6 @@ # --- # name: TestClickhousePaths.test_person_on_events_v2.1 ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -5907,30 +5879,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 4) as limited_path , - arraySlice(filtered_timings, 1, 4) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 4) as limited_path, + arraySlice(filtered_timings, 1, 4) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -5958,7 +5930,7 @@ AND notEmpty(e.person_id) ORDER BY if(notEmpty(overrides.person_id), overrides.person_id, e.person_id), e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -5974,8 +5946,7 @@ # --- # name: TestClickhousePaths.test_recording ''' - - SELECT person_id AS actor_id , + SELECT person_id AS actor_id, groupUniqArray(100)((timestamp, uuid, "$session_id", "$window_id")) as matching_events @@ -5992,50 +5963,50 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, joined_path_tuple.4 as final_uuid, joined_path_tuple.5 as final_timestamp, joined_path_tuple.6 as final_$session_id, - joined_path_tuple.7 as final_$window_id , + joined_path_tuple.7 as final_$window_id, arrayFilter((x, y)->y, uuid, mapping) as uuids, arrayFilter((x, y)->y, timestamp, mapping) as timestamps, arrayFilter((x, y)->y, $session_id, mapping) as $session_ids, - arrayFilter((x, y)->y, $window_id, mapping) as $window_ids , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - if(target_index > 0, arraySlice(uuids, target_index), uuids) as filtered_uuids , - arraySlice(filtered_uuids, 1, 5) as limited_uuids , - if(target_index > 0, arraySlice(timestamps, target_index), timestamps) as filtered_timestamps , - arraySlice(filtered_timestamps, 1, 5) as limited_timestamps , - if(target_index > 0, arraySlice($session_ids, target_index), $session_ids) as filtered_$session_ids , - arraySlice(filtered_$session_ids, 1, 5) as limited_$session_ids , - if(target_index > 0, arraySlice($window_ids, target_index), $window_ids) as filtered_$window_ids , - arraySlice(filtered_$window_ids, 1, 5) as limited_$window_ids , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuids, limited_timestamps, limited_$session_ids, limited_$window_ids) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + arrayFilter((x, y)->y, $window_id, mapping) as $window_ids, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + if(target_index > 0, arraySlice(uuids, target_index), uuids) as filtered_uuids, + arraySlice(filtered_uuids, 1, 5) as limited_uuids, + if(target_index > 0, arraySlice(timestamps, target_index), timestamps) as filtered_timestamps, + arraySlice(filtered_timestamps, 1, 5) as limited_timestamps, + if(target_index > 0, arraySlice($session_ids, target_index), $session_ids) as filtered_$session_ids, + arraySlice(filtered_$session_ids, 1, 5) as limited_$session_ids, + if(target_index > 0, arraySlice($window_ids, target_index), $window_ids) as filtered_$window_ids, + arraySlice(filtered_$window_ids, 1, 5) as limited_$window_ids, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuids, limited_timestamps, limited_$session_ids, limited_$window_ids) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, path_time_tuple.4 as uuid, path_time_tuple.5 as timestamp, path_time_tuple.6 as $session_id, - path_time_tuple.7 as $window_id , - session_index , - arrayZip(paths, timing, arrayDifference(timing), uuids, timestamps, $session_ids, $window_ids) as paths_tuple , + path_time_tuple.7 as $window_id, + session_index, + arrayZip(paths, timing, arrayDifference(timing), uuids, timestamps, $session_ids, $window_ids) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -6076,7 +6047,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-02 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -6101,8 +6072,7 @@ # --- # name: TestClickhousePaths.test_recording_for_dropoff ''' - - SELECT person_id AS actor_id , + SELECT person_id AS actor_id, groupUniqArray(100)((timestamp, uuid, "$session_id", "$window_id")) as matching_events @@ -6119,50 +6089,50 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, joined_path_tuple.4 as final_uuid, joined_path_tuple.5 as final_timestamp, joined_path_tuple.6 as final_$session_id, - joined_path_tuple.7 as final_$window_id , + joined_path_tuple.7 as final_$window_id, arrayFilter((x, y)->y, uuid, mapping) as uuids, arrayFilter((x, y)->y, timestamp, mapping) as timestamps, arrayFilter((x, y)->y, $session_id, mapping) as $session_ids, - arrayFilter((x, y)->y, $window_id, mapping) as $window_ids , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - if(target_index > 0, arraySlice(uuids, target_index), uuids) as filtered_uuids , - arraySlice(filtered_uuids, 1, 5) as limited_uuids , - if(target_index > 0, arraySlice(timestamps, target_index), timestamps) as filtered_timestamps , - arraySlice(filtered_timestamps, 1, 5) as limited_timestamps , - if(target_index > 0, arraySlice($session_ids, target_index), $session_ids) as filtered_$session_ids , - arraySlice(filtered_$session_ids, 1, 5) as limited_$session_ids , - if(target_index > 0, arraySlice($window_ids, target_index), $window_ids) as filtered_$window_ids , - arraySlice(filtered_$window_ids, 1, 5) as limited_$window_ids , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuids, limited_timestamps, limited_$session_ids, limited_$window_ids) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + arrayFilter((x, y)->y, $window_id, mapping) as $window_ids, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + if(target_index > 0, arraySlice(uuids, target_index), uuids) as filtered_uuids, + arraySlice(filtered_uuids, 1, 5) as limited_uuids, + if(target_index > 0, arraySlice(timestamps, target_index), timestamps) as filtered_timestamps, + arraySlice(filtered_timestamps, 1, 5) as limited_timestamps, + if(target_index > 0, arraySlice($session_ids, target_index), $session_ids) as filtered_$session_ids, + arraySlice(filtered_$session_ids, 1, 5) as limited_$session_ids, + if(target_index > 0, arraySlice($window_ids, target_index), $window_ids) as filtered_$window_ids, + arraySlice(filtered_$window_ids, 1, 5) as limited_$window_ids, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuids, limited_timestamps, limited_$session_ids, limited_$window_ids) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, path_time_tuple.4 as uuid, path_time_tuple.5 as timestamp, path_time_tuple.6 as $session_id, - path_time_tuple.7 as $window_id , - session_index , - arrayZip(paths, timing, arrayDifference(timing), uuids, timestamps, $session_ids, $window_ids) as paths_tuple , + path_time_tuple.7 as $window_id, + session_index, + arrayZip(paths, timing, arrayDifference(timing), uuids, timestamps, $session_ids, $window_ids) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -6203,7 +6173,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-02 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -6229,8 +6199,7 @@ # --- # name: TestClickhousePaths.test_recording_for_dropoff.2 ''' - - SELECT person_id AS actor_id , + SELECT person_id AS actor_id, groupUniqArray(100)((timestamp, uuid, "$session_id", "$window_id")) as matching_events @@ -6247,50 +6216,50 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, joined_path_tuple.4 as final_uuid, joined_path_tuple.5 as final_timestamp, joined_path_tuple.6 as final_$session_id, - joined_path_tuple.7 as final_$window_id , + joined_path_tuple.7 as final_$window_id, arrayFilter((x, y)->y, uuid, mapping) as uuids, arrayFilter((x, y)->y, timestamp, mapping) as timestamps, arrayFilter((x, y)->y, $session_id, mapping) as $session_ids, - arrayFilter((x, y)->y, $window_id, mapping) as $window_ids , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - if(target_index > 0, arraySlice(uuids, target_index), uuids) as filtered_uuids , - arraySlice(filtered_uuids, 1, 5) as limited_uuids , - if(target_index > 0, arraySlice(timestamps, target_index), timestamps) as filtered_timestamps , - arraySlice(filtered_timestamps, 1, 5) as limited_timestamps , - if(target_index > 0, arraySlice($session_ids, target_index), $session_ids) as filtered_$session_ids , - arraySlice(filtered_$session_ids, 1, 5) as limited_$session_ids , - if(target_index > 0, arraySlice($window_ids, target_index), $window_ids) as filtered_$window_ids , - arraySlice(filtered_$window_ids, 1, 5) as limited_$window_ids , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuids, limited_timestamps, limited_$session_ids, limited_$window_ids) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + arrayFilter((x, y)->y, $window_id, mapping) as $window_ids, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + if(target_index > 0, arraySlice(uuids, target_index), uuids) as filtered_uuids, + arraySlice(filtered_uuids, 1, 5) as limited_uuids, + if(target_index > 0, arraySlice(timestamps, target_index), timestamps) as filtered_timestamps, + arraySlice(filtered_timestamps, 1, 5) as limited_timestamps, + if(target_index > 0, arraySlice($session_ids, target_index), $session_ids) as filtered_$session_ids, + arraySlice(filtered_$session_ids, 1, 5) as limited_$session_ids, + if(target_index > 0, arraySlice($window_ids, target_index), $window_ids) as filtered_$window_ids, + arraySlice(filtered_$window_ids, 1, 5) as limited_$window_ids, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuids, limited_timestamps, limited_$session_ids, limited_$window_ids) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, path_time_tuple.4 as uuid, path_time_tuple.5 as timestamp, path_time_tuple.6 as $session_id, - path_time_tuple.7 as $window_id , - session_index , - arrayZip(paths, timing, arrayDifference(timing), uuids, timestamps, $session_ids, $window_ids) as paths_tuple , + path_time_tuple.7 as $window_id, + session_index, + arrayZip(paths, timing, arrayDifference(timing), uuids, timestamps, $session_ids, $window_ids) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -6331,7 +6300,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-02 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -6357,8 +6326,7 @@ # --- # name: TestClickhousePaths.test_recording_with_no_window_or_session_id ''' - - SELECT person_id AS actor_id , + SELECT person_id AS actor_id, groupUniqArray(100)((timestamp, uuid, "$session_id", "$window_id")) as matching_events @@ -6375,50 +6343,50 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, joined_path_tuple.4 as final_uuid, joined_path_tuple.5 as final_timestamp, joined_path_tuple.6 as final_$session_id, - joined_path_tuple.7 as final_$window_id , + joined_path_tuple.7 as final_$window_id, arrayFilter((x, y)->y, uuid, mapping) as uuids, arrayFilter((x, y)->y, timestamp, mapping) as timestamps, arrayFilter((x, y)->y, $session_id, mapping) as $session_ids, - arrayFilter((x, y)->y, $window_id, mapping) as $window_ids , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - if(target_index > 0, arraySlice(uuids, target_index), uuids) as filtered_uuids , - arraySlice(filtered_uuids, 1, 5) as limited_uuids , - if(target_index > 0, arraySlice(timestamps, target_index), timestamps) as filtered_timestamps , - arraySlice(filtered_timestamps, 1, 5) as limited_timestamps , - if(target_index > 0, arraySlice($session_ids, target_index), $session_ids) as filtered_$session_ids , - arraySlice(filtered_$session_ids, 1, 5) as limited_$session_ids , - if(target_index > 0, arraySlice($window_ids, target_index), $window_ids) as filtered_$window_ids , - arraySlice(filtered_$window_ids, 1, 5) as limited_$window_ids , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuids, limited_timestamps, limited_$session_ids, limited_$window_ids) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + arrayFilter((x, y)->y, $window_id, mapping) as $window_ids, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + if(target_index > 0, arraySlice(uuids, target_index), uuids) as filtered_uuids, + arraySlice(filtered_uuids, 1, 5) as limited_uuids, + if(target_index > 0, arraySlice(timestamps, target_index), timestamps) as filtered_timestamps, + arraySlice(filtered_timestamps, 1, 5) as limited_timestamps, + if(target_index > 0, arraySlice($session_ids, target_index), $session_ids) as filtered_$session_ids, + arraySlice(filtered_$session_ids, 1, 5) as limited_$session_ids, + if(target_index > 0, arraySlice($window_ids, target_index), $window_ids) as filtered_$window_ids, + arraySlice(filtered_$window_ids, 1, 5) as limited_$window_ids, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuids, limited_timestamps, limited_$session_ids, limited_$window_ids) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, path_time_tuple.4 as uuid, path_time_tuple.5 as timestamp, path_time_tuple.6 as $session_id, - path_time_tuple.7 as $window_id , - session_index , - arrayZip(paths, timing, arrayDifference(timing), uuids, timestamps, $session_ids, $window_ids) as paths_tuple , + path_time_tuple.7 as $window_id, + session_index, + arrayZip(paths, timing, arrayDifference(timing), uuids, timestamps, $session_ids, $window_ids) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -6459,7 +6427,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-02 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -6484,8 +6452,7 @@ # --- # name: TestClickhousePaths.test_recording_with_start_and_end ''' - - SELECT person_id AS actor_id , + SELECT person_id AS actor_id, groupUniqArray(100)((timestamp, uuid, "$session_id", "$window_id")) as matching_events @@ -6502,57 +6469,57 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, joined_path_tuple.4 as final_uuid, joined_path_tuple.5 as final_timestamp, joined_path_tuple.6 as final_$session_id, - joined_path_tuple.7 as final_$window_id , + joined_path_tuple.7 as final_$window_id, arrayFilter((x, y)->y, uuid, mapping) as uuids, arrayFilter((x, y)->y, timestamp, mapping) as timestamps, arrayFilter((x, y)->y, $session_id, mapping) as $session_ids, - arrayFilter((x, y)->y, $window_id, mapping) as $window_ids , - indexOf(compact_path, '/1') as start_target_index , - if(start_target_index > 0, arraySlice(compact_path, start_target_index), compact_path) as start_filtered_path , - if(start_target_index > 0, arraySlice(timings, start_target_index), timings) as start_filtered_timings , - indexOf(start_filtered_path, '/3') as end_target_index , - if(end_target_index > 0, arrayResize(start_filtered_path, end_target_index), start_filtered_path) as filtered_path , - if(end_target_index > 0, arrayResize(start_filtered_timings, end_target_index), start_filtered_timings) as filtered_timings , - if(length(filtered_path) > 5, arrayConcat(arraySlice(filtered_path, 1, intDiv(5, 2)), ['...'], arraySlice(filtered_path, (-1)*intDiv(5, 2), intDiv(5, 2))), filtered_path) AS limited_path , - if(length(filtered_timings) > 5, arrayConcat(arraySlice(filtered_timings, 1, intDiv(5, 2)), [filtered_timings[1+intDiv(5, 2)]], arraySlice(filtered_timings, (-1)*intDiv(5, 2), intDiv(5, 2))), filtered_timings) AS limited_timings , - if(start_target_index > 0, arraySlice(uuids, start_target_index), uuids) as start_filtered_uuids , - if(end_target_index > 0, arrayResize(start_filtered_uuids, end_target_index), start_filtered_uuids) as filtered_uuids , - if(length(filtered_uuids) > 5, arrayConcat(arraySlice(filtered_uuids, 1, intDiv(5, 2)), [filtered_uuids[1+intDiv(5, 2)]], arraySlice(filtered_uuids, (-1)*intDiv(5, 2), intDiv(5, 2))), filtered_uuids) AS limited_uuids , - if(start_target_index > 0, arraySlice(timestamps, start_target_index), timestamps) as start_filtered_timestamps , - if(end_target_index > 0, arrayResize(start_filtered_timestamps, end_target_index), start_filtered_timestamps) as filtered_timestamps , - if(length(filtered_timestamps) > 5, arrayConcat(arraySlice(filtered_timestamps, 1, intDiv(5, 2)), [filtered_timestamps[1+intDiv(5, 2)]], arraySlice(filtered_timestamps, (-1)*intDiv(5, 2), intDiv(5, 2))), filtered_timestamps) AS limited_timestamps , - if(start_target_index > 0, arraySlice($session_ids, start_target_index), $session_ids) as start_filtered_$session_ids , - if(end_target_index > 0, arrayResize(start_filtered_$session_ids, end_target_index), start_filtered_$session_ids) as filtered_$session_ids , - if(length(filtered_$session_ids) > 5, arrayConcat(arraySlice(filtered_$session_ids, 1, intDiv(5, 2)), [filtered_$session_ids[1+intDiv(5, 2)]], arraySlice(filtered_$session_ids, (-1)*intDiv(5, 2), intDiv(5, 2))), filtered_$session_ids) AS limited_$session_ids , - if(start_target_index > 0, arraySlice($window_ids, start_target_index), $window_ids) as start_filtered_$window_ids , - if(end_target_index > 0, arrayResize(start_filtered_$window_ids, end_target_index), start_filtered_$window_ids) as filtered_$window_ids , - if(length(filtered_$window_ids) > 5, arrayConcat(arraySlice(filtered_$window_ids, 1, intDiv(5, 2)), [filtered_$window_ids[1+intDiv(5, 2)]], arraySlice(filtered_$window_ids, (-1)*intDiv(5, 2), intDiv(5, 2))), filtered_$window_ids) AS limited_$window_ids , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuids, limited_timestamps, limited_$session_ids, limited_$window_ids) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + arrayFilter((x, y)->y, $window_id, mapping) as $window_ids, + indexOf(compact_path, '/1') as start_target_index, + if(start_target_index > 0, arraySlice(compact_path, start_target_index), compact_path) as start_filtered_path, + if(start_target_index > 0, arraySlice(timings, start_target_index), timings) as start_filtered_timings, + indexOf(start_filtered_path, '/3') as end_target_index, + if(end_target_index > 0, arrayResize(start_filtered_path, end_target_index), start_filtered_path) as filtered_path, + if(end_target_index > 0, arrayResize(start_filtered_timings, end_target_index), start_filtered_timings) as filtered_timings, + if(length(filtered_path) > 5, arrayConcat(arraySlice(filtered_path, 1, intDiv(5, 2)), ['...'], arraySlice(filtered_path, (-1)*intDiv(5, 2), intDiv(5, 2))), filtered_path) AS limited_path, + if(length(filtered_timings) > 5, arrayConcat(arraySlice(filtered_timings, 1, intDiv(5, 2)), [filtered_timings[1+intDiv(5, 2)]], arraySlice(filtered_timings, (-1)*intDiv(5, 2), intDiv(5, 2))), filtered_timings) AS limited_timings, + if(start_target_index > 0, arraySlice(uuids, start_target_index), uuids) as start_filtered_uuids, + if(end_target_index > 0, arrayResize(start_filtered_uuids, end_target_index), start_filtered_uuids) as filtered_uuids, + if(length(filtered_uuids) > 5, arrayConcat(arraySlice(filtered_uuids, 1, intDiv(5, 2)), [filtered_uuids[1+intDiv(5, 2)]], arraySlice(filtered_uuids, (-1)*intDiv(5, 2), intDiv(5, 2))), filtered_uuids) AS limited_uuids, + if(start_target_index > 0, arraySlice(timestamps, start_target_index), timestamps) as start_filtered_timestamps, + if(end_target_index > 0, arrayResize(start_filtered_timestamps, end_target_index), start_filtered_timestamps) as filtered_timestamps, + if(length(filtered_timestamps) > 5, arrayConcat(arraySlice(filtered_timestamps, 1, intDiv(5, 2)), [filtered_timestamps[1+intDiv(5, 2)]], arraySlice(filtered_timestamps, (-1)*intDiv(5, 2), intDiv(5, 2))), filtered_timestamps) AS limited_timestamps, + if(start_target_index > 0, arraySlice($session_ids, start_target_index), $session_ids) as start_filtered_$session_ids, + if(end_target_index > 0, arrayResize(start_filtered_$session_ids, end_target_index), start_filtered_$session_ids) as filtered_$session_ids, + if(length(filtered_$session_ids) > 5, arrayConcat(arraySlice(filtered_$session_ids, 1, intDiv(5, 2)), [filtered_$session_ids[1+intDiv(5, 2)]], arraySlice(filtered_$session_ids, (-1)*intDiv(5, 2), intDiv(5, 2))), filtered_$session_ids) AS limited_$session_ids, + if(start_target_index > 0, arraySlice($window_ids, start_target_index), $window_ids) as start_filtered_$window_ids, + if(end_target_index > 0, arrayResize(start_filtered_$window_ids, end_target_index), start_filtered_$window_ids) as filtered_$window_ids, + if(length(filtered_$window_ids) > 5, arrayConcat(arraySlice(filtered_$window_ids, 1, intDiv(5, 2)), [filtered_$window_ids[1+intDiv(5, 2)]], arraySlice(filtered_$window_ids, (-1)*intDiv(5, 2), intDiv(5, 2))), filtered_$window_ids) AS limited_$window_ids, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuids, limited_timestamps, limited_$session_ids, limited_$window_ids) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, path_time_tuple.4 as uuid, path_time_tuple.5 as timestamp, path_time_tuple.6 as $session_id, - path_time_tuple.7 as $window_id , - session_index , - arrayZip(paths, timing, arrayDifference(timing), uuids, timestamps, $session_ids, $window_ids) as paths_tuple , + path_time_tuple.7 as $window_id, + session_index, + arrayZip(paths, timing, arrayDifference(timing), uuids, timestamps, $session_ids, $window_ids) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -6593,7 +6560,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-02 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -6620,7 +6587,6 @@ # --- # name: TestClickhousePaths.test_respect_session_limits ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -6634,30 +6600,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -6688,7 +6654,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2023-05-23 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -6704,7 +6670,6 @@ # --- # name: TestClickhousePaths.test_start_and_end ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -6718,33 +6683,33 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, '/5') as start_target_index , - if(start_target_index > 0, arraySlice(compact_path, start_target_index), compact_path) as start_filtered_path , - if(start_target_index > 0, arraySlice(timings, start_target_index), timings) as start_filtered_timings , - indexOf(start_filtered_path, '/about') as end_target_index , - if(end_target_index > 0, arrayResize(start_filtered_path, end_target_index), start_filtered_path) as filtered_path , - if(end_target_index > 0, arrayResize(start_filtered_timings, end_target_index), start_filtered_timings) as filtered_timings , - if(length(filtered_path) > 5, arrayConcat(arraySlice(filtered_path, 1, intDiv(5, 2)), ['...'], arraySlice(filtered_path, (-1)*intDiv(5, 2), intDiv(5, 2))), filtered_path) AS limited_path , - if(length(filtered_timings) > 5, arrayConcat(arraySlice(filtered_timings, 1, intDiv(5, 2)), [filtered_timings[1+intDiv(5, 2)]], arraySlice(filtered_timings, (-1)*intDiv(5, 2), intDiv(5, 2))), filtered_timings) AS limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, '/5') as start_target_index, + if(start_target_index > 0, arraySlice(compact_path, start_target_index), compact_path) as start_filtered_path, + if(start_target_index > 0, arraySlice(timings, start_target_index), timings) as start_filtered_timings, + indexOf(start_filtered_path, '/about') as end_target_index, + if(end_target_index > 0, arrayResize(start_filtered_path, end_target_index), start_filtered_path) as filtered_path, + if(end_target_index > 0, arrayResize(start_filtered_timings, end_target_index), start_filtered_timings) as filtered_timings, + if(length(filtered_path) > 5, arrayConcat(arraySlice(filtered_path, 1, intDiv(5, 2)), ['...'], arraySlice(filtered_path, (-1)*intDiv(5, 2), intDiv(5, 2))), filtered_path) AS limited_path, + if(length(filtered_timings) > 5, arrayConcat(arraySlice(filtered_timings, 1, intDiv(5, 2)), [filtered_timings[1+intDiv(5, 2)]], arraySlice(filtered_timings, (-1)*intDiv(5, 2), intDiv(5, 2))), filtered_timings) AS limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -6775,7 +6740,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -6792,7 +6757,6 @@ # --- # name: TestClickhousePaths.test_start_and_end.1 ''' - SELECT DISTINCT person_id AS actor_id FROM (SELECT person_id, @@ -6803,33 +6767,33 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, '/5') as start_target_index , - if(start_target_index > 0, arraySlice(compact_path, start_target_index), compact_path) as start_filtered_path , - if(start_target_index > 0, arraySlice(timings, start_target_index), timings) as start_filtered_timings , - indexOf(start_filtered_path, '/about') as end_target_index , - if(end_target_index > 0, arrayResize(start_filtered_path, end_target_index), start_filtered_path) as filtered_path , - if(end_target_index > 0, arrayResize(start_filtered_timings, end_target_index), start_filtered_timings) as filtered_timings , - if(length(filtered_path) > 5, arrayConcat(arraySlice(filtered_path, 1, intDiv(5, 2)), ['...'], arraySlice(filtered_path, (-1)*intDiv(5, 2), intDiv(5, 2))), filtered_path) AS limited_path , - if(length(filtered_timings) > 5, arrayConcat(arraySlice(filtered_timings, 1, intDiv(5, 2)), [filtered_timings[1+intDiv(5, 2)]], arraySlice(filtered_timings, (-1)*intDiv(5, 2), intDiv(5, 2))), filtered_timings) AS limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, '/5') as start_target_index, + if(start_target_index > 0, arraySlice(compact_path, start_target_index), compact_path) as start_filtered_path, + if(start_target_index > 0, arraySlice(timings, start_target_index), timings) as start_filtered_timings, + indexOf(start_filtered_path, '/about') as end_target_index, + if(end_target_index > 0, arrayResize(start_filtered_path, end_target_index), start_filtered_path) as filtered_path, + if(end_target_index > 0, arrayResize(start_filtered_timings, end_target_index), start_filtered_timings) as filtered_timings, + if(length(filtered_path) > 5, arrayConcat(arraySlice(filtered_path, 1, intDiv(5, 2)), ['...'], arraySlice(filtered_path, (-1)*intDiv(5, 2), intDiv(5, 2))), filtered_path) AS limited_path, + if(length(filtered_timings) > 5, arrayConcat(arraySlice(filtered_timings, 1, intDiv(5, 2)), [filtered_timings[1+intDiv(5, 2)]], arraySlice(filtered_timings, (-1)*intDiv(5, 2), intDiv(5, 2))), filtered_timings) AS limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -6860,7 +6824,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -6876,7 +6840,6 @@ # --- # name: TestClickhousePaths.test_start_and_end.2 ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -6890,33 +6853,33 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, '/2') as start_target_index , - if(start_target_index > 0, arraySlice(compact_path, start_target_index), compact_path) as start_filtered_path , - if(start_target_index > 0, arraySlice(timings, start_target_index), timings) as start_filtered_timings , - indexOf(start_filtered_path, '/about') as end_target_index , - if(end_target_index > 0, arrayResize(start_filtered_path, end_target_index), start_filtered_path) as filtered_path , - if(end_target_index > 0, arrayResize(start_filtered_timings, end_target_index), start_filtered_timings) as filtered_timings , - if(length(filtered_path) > 4, arrayConcat(arraySlice(filtered_path, 1, intDiv(4, 2)), ['...'], arraySlice(filtered_path, (-1)*intDiv(4, 2), intDiv(4, 2))), filtered_path) AS limited_path , - if(length(filtered_timings) > 4, arrayConcat(arraySlice(filtered_timings, 1, intDiv(4, 2)), [filtered_timings[1+intDiv(4, 2)]], arraySlice(filtered_timings, (-1)*intDiv(4, 2), intDiv(4, 2))), filtered_timings) AS limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, '/2') as start_target_index, + if(start_target_index > 0, arraySlice(compact_path, start_target_index), compact_path) as start_filtered_path, + if(start_target_index > 0, arraySlice(timings, start_target_index), timings) as start_filtered_timings, + indexOf(start_filtered_path, '/about') as end_target_index, + if(end_target_index > 0, arrayResize(start_filtered_path, end_target_index), start_filtered_path) as filtered_path, + if(end_target_index > 0, arrayResize(start_filtered_timings, end_target_index), start_filtered_timings) as filtered_timings, + if(length(filtered_path) > 4, arrayConcat(arraySlice(filtered_path, 1, intDiv(4, 2)), ['...'], arraySlice(filtered_path, (-1)*intDiv(4, 2), intDiv(4, 2))), filtered_path) AS limited_path, + if(length(filtered_timings) > 4, arrayConcat(arraySlice(filtered_timings, 1, intDiv(4, 2)), [filtered_timings[1+intDiv(4, 2)]], arraySlice(filtered_timings, (-1)*intDiv(4, 2), intDiv(4, 2))), filtered_timings) AS limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -6947,7 +6910,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -6964,7 +6927,6 @@ # --- # name: TestClickhousePaths.test_start_and_end.3 ''' - SELECT DISTINCT person_id AS actor_id FROM (SELECT person_id, @@ -6975,33 +6937,33 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, '/2') as start_target_index , - if(start_target_index > 0, arraySlice(compact_path, start_target_index), compact_path) as start_filtered_path , - if(start_target_index > 0, arraySlice(timings, start_target_index), timings) as start_filtered_timings , - indexOf(start_filtered_path, '/about') as end_target_index , - if(end_target_index > 0, arrayResize(start_filtered_path, end_target_index), start_filtered_path) as filtered_path , - if(end_target_index > 0, arrayResize(start_filtered_timings, end_target_index), start_filtered_timings) as filtered_timings , - if(length(filtered_path) > 4, arrayConcat(arraySlice(filtered_path, 1, intDiv(4, 2)), ['...'], arraySlice(filtered_path, (-1)*intDiv(4, 2), intDiv(4, 2))), filtered_path) AS limited_path , - if(length(filtered_timings) > 4, arrayConcat(arraySlice(filtered_timings, 1, intDiv(4, 2)), [filtered_timings[1+intDiv(4, 2)]], arraySlice(filtered_timings, (-1)*intDiv(4, 2), intDiv(4, 2))), filtered_timings) AS limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, '/2') as start_target_index, + if(start_target_index > 0, arraySlice(compact_path, start_target_index), compact_path) as start_filtered_path, + if(start_target_index > 0, arraySlice(timings, start_target_index), timings) as start_filtered_timings, + indexOf(start_filtered_path, '/about') as end_target_index, + if(end_target_index > 0, arrayResize(start_filtered_path, end_target_index), start_filtered_path) as filtered_path, + if(end_target_index > 0, arrayResize(start_filtered_timings, end_target_index), start_filtered_timings) as filtered_timings, + if(length(filtered_path) > 4, arrayConcat(arraySlice(filtered_path, 1, intDiv(4, 2)), ['...'], arraySlice(filtered_path, (-1)*intDiv(4, 2), intDiv(4, 2))), filtered_path) AS limited_path, + if(length(filtered_timings) > 4, arrayConcat(arraySlice(filtered_timings, 1, intDiv(4, 2)), [filtered_timings[1+intDiv(4, 2)]], arraySlice(filtered_timings, (-1)*intDiv(4, 2), intDiv(4, 2))), filtered_timings) AS limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -7032,7 +6994,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -7048,7 +7010,6 @@ # --- # name: TestClickhousePaths.test_start_and_end_materialized ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -7062,33 +7023,33 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, '/5') as start_target_index , - if(start_target_index > 0, arraySlice(compact_path, start_target_index), compact_path) as start_filtered_path , - if(start_target_index > 0, arraySlice(timings, start_target_index), timings) as start_filtered_timings , - indexOf(start_filtered_path, '/about') as end_target_index , - if(end_target_index > 0, arrayResize(start_filtered_path, end_target_index), start_filtered_path) as filtered_path , - if(end_target_index > 0, arrayResize(start_filtered_timings, end_target_index), start_filtered_timings) as filtered_timings , - if(length(filtered_path) > 5, arrayConcat(arraySlice(filtered_path, 1, intDiv(5, 2)), ['...'], arraySlice(filtered_path, (-1)*intDiv(5, 2), intDiv(5, 2))), filtered_path) AS limited_path , - if(length(filtered_timings) > 5, arrayConcat(arraySlice(filtered_timings, 1, intDiv(5, 2)), [filtered_timings[1+intDiv(5, 2)]], arraySlice(filtered_timings, (-1)*intDiv(5, 2), intDiv(5, 2))), filtered_timings) AS limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, '/5') as start_target_index, + if(start_target_index > 0, arraySlice(compact_path, start_target_index), compact_path) as start_filtered_path, + if(start_target_index > 0, arraySlice(timings, start_target_index), timings) as start_filtered_timings, + indexOf(start_filtered_path, '/about') as end_target_index, + if(end_target_index > 0, arrayResize(start_filtered_path, end_target_index), start_filtered_path) as filtered_path, + if(end_target_index > 0, arrayResize(start_filtered_timings, end_target_index), start_filtered_timings) as filtered_timings, + if(length(filtered_path) > 5, arrayConcat(arraySlice(filtered_path, 1, intDiv(5, 2)), ['...'], arraySlice(filtered_path, (-1)*intDiv(5, 2), intDiv(5, 2))), filtered_path) AS limited_path, + if(length(filtered_timings) > 5, arrayConcat(arraySlice(filtered_timings, 1, intDiv(5, 2)), [filtered_timings[1+intDiv(5, 2)]], arraySlice(filtered_timings, (-1)*intDiv(5, 2), intDiv(5, 2))), filtered_timings) AS limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -7119,7 +7080,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -7136,7 +7097,6 @@ # --- # name: TestClickhousePaths.test_start_and_end_materialized.1 ''' - SELECT DISTINCT person_id AS actor_id FROM (SELECT person_id, @@ -7147,33 +7107,33 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, '/5') as start_target_index , - if(start_target_index > 0, arraySlice(compact_path, start_target_index), compact_path) as start_filtered_path , - if(start_target_index > 0, arraySlice(timings, start_target_index), timings) as start_filtered_timings , - indexOf(start_filtered_path, '/about') as end_target_index , - if(end_target_index > 0, arrayResize(start_filtered_path, end_target_index), start_filtered_path) as filtered_path , - if(end_target_index > 0, arrayResize(start_filtered_timings, end_target_index), start_filtered_timings) as filtered_timings , - if(length(filtered_path) > 5, arrayConcat(arraySlice(filtered_path, 1, intDiv(5, 2)), ['...'], arraySlice(filtered_path, (-1)*intDiv(5, 2), intDiv(5, 2))), filtered_path) AS limited_path , - if(length(filtered_timings) > 5, arrayConcat(arraySlice(filtered_timings, 1, intDiv(5, 2)), [filtered_timings[1+intDiv(5, 2)]], arraySlice(filtered_timings, (-1)*intDiv(5, 2), intDiv(5, 2))), filtered_timings) AS limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, '/5') as start_target_index, + if(start_target_index > 0, arraySlice(compact_path, start_target_index), compact_path) as start_filtered_path, + if(start_target_index > 0, arraySlice(timings, start_target_index), timings) as start_filtered_timings, + indexOf(start_filtered_path, '/about') as end_target_index, + if(end_target_index > 0, arrayResize(start_filtered_path, end_target_index), start_filtered_path) as filtered_path, + if(end_target_index > 0, arrayResize(start_filtered_timings, end_target_index), start_filtered_timings) as filtered_timings, + if(length(filtered_path) > 5, arrayConcat(arraySlice(filtered_path, 1, intDiv(5, 2)), ['...'], arraySlice(filtered_path, (-1)*intDiv(5, 2), intDiv(5, 2))), filtered_path) AS limited_path, + if(length(filtered_timings) > 5, arrayConcat(arraySlice(filtered_timings, 1, intDiv(5, 2)), [filtered_timings[1+intDiv(5, 2)]], arraySlice(filtered_timings, (-1)*intDiv(5, 2), intDiv(5, 2))), filtered_timings) AS limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -7204,7 +7164,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -7220,7 +7180,6 @@ # --- # name: TestClickhousePaths.test_start_and_end_materialized.2 ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -7234,33 +7193,33 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, '/2') as start_target_index , - if(start_target_index > 0, arraySlice(compact_path, start_target_index), compact_path) as start_filtered_path , - if(start_target_index > 0, arraySlice(timings, start_target_index), timings) as start_filtered_timings , - indexOf(start_filtered_path, '/about') as end_target_index , - if(end_target_index > 0, arrayResize(start_filtered_path, end_target_index), start_filtered_path) as filtered_path , - if(end_target_index > 0, arrayResize(start_filtered_timings, end_target_index), start_filtered_timings) as filtered_timings , - if(length(filtered_path) > 4, arrayConcat(arraySlice(filtered_path, 1, intDiv(4, 2)), ['...'], arraySlice(filtered_path, (-1)*intDiv(4, 2), intDiv(4, 2))), filtered_path) AS limited_path , - if(length(filtered_timings) > 4, arrayConcat(arraySlice(filtered_timings, 1, intDiv(4, 2)), [filtered_timings[1+intDiv(4, 2)]], arraySlice(filtered_timings, (-1)*intDiv(4, 2), intDiv(4, 2))), filtered_timings) AS limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, '/2') as start_target_index, + if(start_target_index > 0, arraySlice(compact_path, start_target_index), compact_path) as start_filtered_path, + if(start_target_index > 0, arraySlice(timings, start_target_index), timings) as start_filtered_timings, + indexOf(start_filtered_path, '/about') as end_target_index, + if(end_target_index > 0, arrayResize(start_filtered_path, end_target_index), start_filtered_path) as filtered_path, + if(end_target_index > 0, arrayResize(start_filtered_timings, end_target_index), start_filtered_timings) as filtered_timings, + if(length(filtered_path) > 4, arrayConcat(arraySlice(filtered_path, 1, intDiv(4, 2)), ['...'], arraySlice(filtered_path, (-1)*intDiv(4, 2), intDiv(4, 2))), filtered_path) AS limited_path, + if(length(filtered_timings) > 4, arrayConcat(arraySlice(filtered_timings, 1, intDiv(4, 2)), [filtered_timings[1+intDiv(4, 2)]], arraySlice(filtered_timings, (-1)*intDiv(4, 2), intDiv(4, 2))), filtered_timings) AS limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -7291,7 +7250,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -7308,7 +7267,6 @@ # --- # name: TestClickhousePaths.test_start_and_end_materialized.3 ''' - SELECT DISTINCT person_id AS actor_id FROM (SELECT person_id, @@ -7319,33 +7277,33 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, '/2') as start_target_index , - if(start_target_index > 0, arraySlice(compact_path, start_target_index), compact_path) as start_filtered_path , - if(start_target_index > 0, arraySlice(timings, start_target_index), timings) as start_filtered_timings , - indexOf(start_filtered_path, '/about') as end_target_index , - if(end_target_index > 0, arrayResize(start_filtered_path, end_target_index), start_filtered_path) as filtered_path , - if(end_target_index > 0, arrayResize(start_filtered_timings, end_target_index), start_filtered_timings) as filtered_timings , - if(length(filtered_path) > 4, arrayConcat(arraySlice(filtered_path, 1, intDiv(4, 2)), ['...'], arraySlice(filtered_path, (-1)*intDiv(4, 2), intDiv(4, 2))), filtered_path) AS limited_path , - if(length(filtered_timings) > 4, arrayConcat(arraySlice(filtered_timings, 1, intDiv(4, 2)), [filtered_timings[1+intDiv(4, 2)]], arraySlice(filtered_timings, (-1)*intDiv(4, 2), intDiv(4, 2))), filtered_timings) AS limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, '/2') as start_target_index, + if(start_target_index > 0, arraySlice(compact_path, start_target_index), compact_path) as start_filtered_path, + if(start_target_index > 0, arraySlice(timings, start_target_index), timings) as start_filtered_timings, + indexOf(start_filtered_path, '/about') as end_target_index, + if(end_target_index > 0, arrayResize(start_filtered_path, end_target_index), start_filtered_path) as filtered_path, + if(end_target_index > 0, arrayResize(start_filtered_timings, end_target_index), start_filtered_timings) as filtered_timings, + if(length(filtered_path) > 4, arrayConcat(arraySlice(filtered_path, 1, intDiv(4, 2)), ['...'], arraySlice(filtered_path, (-1)*intDiv(4, 2), intDiv(4, 2))), filtered_path) AS limited_path, + if(length(filtered_timings) > 4, arrayConcat(arraySlice(filtered_timings, 1, intDiv(4, 2)), [filtered_timings[1+intDiv(4, 2)]], arraySlice(filtered_timings, (-1)*intDiv(4, 2), intDiv(4, 2))), filtered_timings) AS limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -7376,7 +7334,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -7392,7 +7350,6 @@ # --- # name: TestClickhousePaths.test_start_dropping_orphaned_edges ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -7406,30 +7363,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, '/2') as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, '/2') as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -7460,7 +7417,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -7477,7 +7434,6 @@ # --- # name: TestClickhousePaths.test_step_conversion_times ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -7491,30 +7447,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 4) as limited_path , - arraySlice(filtered_timings, 1, 4) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 4) as limited_path, + arraySlice(filtered_timings, 1, 4) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -7547,7 +7503,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2023-05-23 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -7563,7 +7519,6 @@ # --- # name: TestClickhousePaths.test_step_limit ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -7577,30 +7532,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 2) as limited_path , - arraySlice(filtered_timings, 1, 2) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 2) as limited_path, + arraySlice(filtered_timings, 1, 2) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -7631,7 +7586,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -7647,7 +7602,6 @@ # --- # name: TestClickhousePaths.test_step_limit.1 ''' - SELECT DISTINCT person_id AS actor_id FROM (SELECT person_id, @@ -7658,30 +7612,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 2) as limited_path , - arraySlice(filtered_timings, 1, 2) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 2) as limited_path, + arraySlice(filtered_timings, 1, 2) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -7712,7 +7666,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -7726,7 +7680,6 @@ # --- # name: TestClickhousePaths.test_step_limit.2 ''' - SELECT DISTINCT person_id AS actor_id FROM (SELECT person_id, @@ -7737,30 +7690,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 2) as limited_path , - arraySlice(filtered_timings, 1, 2) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 2) as limited_path, + arraySlice(filtered_timings, 1, 2) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -7791,7 +7744,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -7805,7 +7758,6 @@ # --- # name: TestClickhousePaths.test_step_limit.3 ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -7819,30 +7771,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 3) as limited_path , - arraySlice(filtered_timings, 1, 3) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 3) as limited_path, + arraySlice(filtered_timings, 1, 3) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -7873,7 +7825,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -7889,7 +7841,6 @@ # --- # name: TestClickhousePaths.test_step_limit.4 ''' - SELECT DISTINCT person_id AS actor_id FROM (SELECT person_id, @@ -7900,30 +7851,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 3) as limited_path , - arraySlice(filtered_timings, 1, 3) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 3) as limited_path, + arraySlice(filtered_timings, 1, 3) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -7954,7 +7905,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -7968,7 +7919,6 @@ # --- # name: TestClickhousePaths.test_step_limit.5 ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -7982,30 +7932,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 4) as limited_path , - arraySlice(filtered_timings, 1, 4) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 4) as limited_path, + arraySlice(filtered_timings, 1, 4) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -8036,7 +7986,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -8052,7 +8002,6 @@ # --- # name: TestClickhousePaths.test_step_limit.6 ''' - SELECT DISTINCT person_id AS actor_id FROM (SELECT person_id, @@ -8063,30 +8012,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 4) as limited_path , - arraySlice(filtered_timings, 1, 4) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 4) as limited_path, + arraySlice(filtered_timings, 1, 4) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -8117,7 +8066,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -8131,7 +8080,6 @@ # --- # name: TestClickhousePaths.test_step_limit.7 ''' - SELECT DISTINCT person_id AS actor_id FROM (SELECT person_id, @@ -8142,30 +8090,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 4) as limited_path , - arraySlice(filtered_timings, 1, 4) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 4) as limited_path, + arraySlice(filtered_timings, 1, 4) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -8196,7 +8144,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -8210,7 +8158,6 @@ # --- # name: TestClickhousePaths.test_step_limit.8 ''' - SELECT DISTINCT person_id AS actor_id FROM (SELECT person_id, @@ -8221,30 +8168,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 4) as limited_path , - arraySlice(filtered_timings, 1, 4) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 4) as limited_path, + arraySlice(filtered_timings, 1, 4) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -8275,7 +8222,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -8289,7 +8236,6 @@ # --- # name: TestClickhousePaths.test_team_and_local_path_cleaning_rules ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -8303,30 +8249,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -8361,7 +8307,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -8377,7 +8323,6 @@ # --- # name: TestClickhousePaths.test_team_and_local_path_cleaning_rules.1 ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -8391,30 +8336,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -8449,7 +8394,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -8465,7 +8410,6 @@ # --- # name: TestClickhousePaths.test_team_and_local_path_cleaning_rules.2 ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -8479,30 +8423,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -8537,7 +8481,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -8553,7 +8497,6 @@ # --- # name: TestClickhousePaths.test_team_path_cleaning_rules ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -8567,30 +8510,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -8623,7 +8566,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -8639,7 +8582,6 @@ # --- # name: TestClickhousePaths.test_team_path_cleaning_rules.1 ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -8653,30 +8595,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -8710,7 +8652,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -8726,7 +8668,6 @@ # --- # name: TestClickhousePaths.test_wildcard_groups ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -8740,30 +8681,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -8794,7 +8735,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -8810,7 +8751,6 @@ # --- # name: TestClickhousePaths.test_wildcard_groups_across_people ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -8824,30 +8764,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 4) as limited_path , - arraySlice(filtered_timings, 1, 4) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 4) as limited_path, + arraySlice(filtered_timings, 1, 4) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -8880,7 +8820,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2023-05-23 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -8896,7 +8836,6 @@ # --- # name: TestClickhousePaths.test_wildcard_groups_and_min_edge_weight ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -8910,30 +8849,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -8964,7 +8903,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -8981,7 +8920,6 @@ # --- # name: TestClickhousePaths.test_wildcard_groups_and_min_edge_weight.1 ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -8995,30 +8933,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -9049,7 +8987,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -9066,7 +9004,6 @@ # --- # name: TestClickhousePaths.test_wildcard_groups_and_min_edge_weight.2 ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -9080,30 +9017,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -9134,7 +9071,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -9152,7 +9089,6 @@ # --- # name: TestClickhousePaths.test_wildcard_groups_evil_input ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -9166,30 +9102,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -9222,7 +9158,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2023-05-23 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple, @@ -9238,7 +9174,6 @@ # --- # name: TestClickhousePaths.test_wildcard_groups_with_sampling ''' - SELECT last_path_key as source_event, path_key as target_event, COUNT(*) AS event_count, @@ -9252,30 +9187,30 @@ if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, path_dropoff_key FROM - (SELECT person_id , - joined_path_tuple.1 as path , - joined_path_tuple.2 as conversion_time , - joined_path_tuple.3 as prev_path , - event_in_session_index , - session_index , - arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , - arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , - arrayFilter((x, y) -> y, time, mapping) as timings , - arrayFilter((x, y)->y, path_basic, mapping) as compact_path , - indexOf(compact_path, NULL) as target_index , - if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , - if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , - arraySlice(filtered_path, 1, 5) as limited_path , - arraySlice(filtered_timings, 1, 5) as limited_timings , - arrayDifference(limited_timings) as timings_diff , - arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , - concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + (SELECT person_id, + joined_path_tuple.1 as path, + joined_path_tuple.2 as conversion_time, + joined_path_tuple.3 as prev_path, + event_in_session_index, + session_index, + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, + arrayFilter((x, y) -> y, time, mapping) as timings, + arrayFilter((x, y)->y, path_basic, mapping) as compact_path, + indexOf(compact_path, NULL) as target_index, + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path, + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings, + arraySlice(filtered_path, 1, 5) as limited_path, + arraySlice(filtered_timings, 1, 5) as limited_timings, + arrayDifference(limited_timings) as timings_diff, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings, + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */ FROM - (SELECT person_id , - path_time_tuple.1 as path_basic , - path_time_tuple.2 as time , - session_index , - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + (SELECT person_id, + path_time_tuple.1 as path_basic, + path_time_tuple.2 as time, + session_index, + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, arraySplit(x -> if(x.3 < 1800000, 0, 1), paths_tuple) as session_paths FROM (SELECT person_id, @@ -9306,7 +9241,7 @@ AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ORDER BY pdi.person_id, e.timestamp) - GROUP BY person_id) ARRAY + GROUP BY person_id )/* this array join splits paths for a single personID per session */ ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index) ARRAY JOIN limited_path_timings AS joined_path_tuple,