diff --git a/ee/api/test/__snapshots__/test_organization_resource_access.ambr b/ee/api/test/__snapshots__/test_organization_resource_access.ambr new file mode 100644 index 0000000000000..bf8927ee81e66 --- /dev/null +++ b/ee/api/test/__snapshots__/test_organization_resource_access.ambr @@ -0,0 +1,257 @@ +# serializer version: 1 +# name: TestOrganizationResourceAccessAPI.test_list_organization_resource_access_is_not_nplus1 + ''' + SELECT "posthog_user"."id", + "posthog_user"."password", + "posthog_user"."last_login", + "posthog_user"."first_name", + "posthog_user"."last_name", + "posthog_user"."is_staff", + "posthog_user"."date_joined", + "posthog_user"."uuid", + "posthog_user"."current_organization_id", + "posthog_user"."current_team_id", + "posthog_user"."email", + "posthog_user"."pending_email", + "posthog_user"."temporary_token", + "posthog_user"."distinct_id", + "posthog_user"."is_email_verified", + "posthog_user"."has_seen_product_intro_for", + "posthog_user"."strapi_id", + "posthog_user"."is_active", + "posthog_user"."theme_mode", + "posthog_user"."partial_notification_settings", + "posthog_user"."anonymize_data", + "posthog_user"."toolbar_mode", + "posthog_user"."hedgehog_config", + "posthog_user"."events_column_config", + "posthog_user"."email_opt_in" + FROM "posthog_user" + WHERE "posthog_user"."id" = 99999 + LIMIT 21 + ''' +# --- +# name: TestOrganizationResourceAccessAPI.test_list_organization_resource_access_is_not_nplus1.1 + ''' + SELECT "posthog_organization"."id", + "posthog_organization"."name", + "posthog_organization"."slug", + "posthog_organization"."logo_media_id", + "posthog_organization"."created_at", + "posthog_organization"."updated_at", + "posthog_organization"."plugins_access_level", + "posthog_organization"."for_internal_metrics", + "posthog_organization"."is_member_join_email_enabled", + "posthog_organization"."enforce_2fa", + "posthog_organization"."is_hipaa", + "posthog_organization"."customer_id", + "posthog_organization"."available_product_features", + "posthog_organization"."usage", + "posthog_organization"."never_drop_data", + "posthog_organization"."customer_trust_scores", + "posthog_organization"."setup_section_2_completed", + "posthog_organization"."personalization", + "posthog_organization"."domain_whitelist" + FROM "posthog_organization" + WHERE "posthog_organization"."id" = '00000000-0000-0000-0000-000000000000'::uuid + LIMIT 21 + ''' +# --- +# name: TestOrganizationResourceAccessAPI.test_list_organization_resource_access_is_not_nplus1.10 + ''' + SELECT 1 AS "a" + FROM "posthog_organizationmembership" + WHERE ("posthog_organizationmembership"."organization_id" = '00000000-0000-0000-0000-000000000000'::uuid + AND "posthog_organizationmembership"."user_id" = 99999) + LIMIT 1 + ''' +# --- +# name: TestOrganizationResourceAccessAPI.test_list_organization_resource_access_is_not_nplus1.11 + ''' + SELECT "posthog_organizationmembership"."id", + "posthog_organizationmembership"."organization_id", + "posthog_organizationmembership"."user_id", + "posthog_organizationmembership"."level", + "posthog_organizationmembership"."joined_at", + "posthog_organizationmembership"."updated_at" + FROM "posthog_organizationmembership" + WHERE ("posthog_organizationmembership"."organization_id" = '00000000-0000-0000-0000-000000000000'::uuid + AND "posthog_organizationmembership"."user_id" = 99999) + LIMIT 21 + ''' +# --- +# name: TestOrganizationResourceAccessAPI.test_list_organization_resource_access_is_not_nplus1.12 + ''' + SELECT COUNT(*) AS "__count" + FROM "ee_organizationresourceaccess" + WHERE "ee_organizationresourceaccess"."organization_id" = '00000000-0000-0000-0000-000000000000'::uuid + ''' +# --- +# name: TestOrganizationResourceAccessAPI.test_list_organization_resource_access_is_not_nplus1.13 + ''' + SELECT "ee_organizationresourceaccess"."id", + "ee_organizationresourceaccess"."resource", + "ee_organizationresourceaccess"."access_level", + "ee_organizationresourceaccess"."organization_id", + "ee_organizationresourceaccess"."created_by_id", + "ee_organizationresourceaccess"."created_at", + "ee_organizationresourceaccess"."updated_at" + FROM "ee_organizationresourceaccess" + WHERE "ee_organizationresourceaccess"."organization_id" = '00000000-0000-0000-0000-000000000000'::uuid + LIMIT 100 + ''' +# --- +# name: TestOrganizationResourceAccessAPI.test_list_organization_resource_access_is_not_nplus1.2 + ''' + SELECT "posthog_organization"."id", + "posthog_organization"."name", + "posthog_organization"."slug", + "posthog_organization"."logo_media_id", + "posthog_organization"."created_at", + "posthog_organization"."updated_at", + "posthog_organization"."plugins_access_level", + "posthog_organization"."for_internal_metrics", + "posthog_organization"."is_member_join_email_enabled", + "posthog_organization"."enforce_2fa", + "posthog_organization"."is_hipaa", + "posthog_organization"."customer_id", + "posthog_organization"."available_product_features", + "posthog_organization"."usage", + "posthog_organization"."never_drop_data", + "posthog_organization"."customer_trust_scores", + "posthog_organization"."setup_section_2_completed", + "posthog_organization"."personalization", + "posthog_organization"."domain_whitelist" + FROM "posthog_organization" + WHERE "posthog_organization"."id" = '00000000-0000-0000-0000-000000000000'::uuid + LIMIT 21 + ''' +# --- +# name: TestOrganizationResourceAccessAPI.test_list_organization_resource_access_is_not_nplus1.3 + ''' + SELECT 1 AS "a" + FROM "posthog_organizationmembership" + WHERE ("posthog_organizationmembership"."organization_id" = '00000000-0000-0000-0000-000000000000'::uuid + AND "posthog_organizationmembership"."user_id" = 99999) + LIMIT 1 + ''' +# --- +# name: TestOrganizationResourceAccessAPI.test_list_organization_resource_access_is_not_nplus1.4 + ''' + SELECT "posthog_organizationmembership"."id", + "posthog_organizationmembership"."organization_id", + "posthog_organizationmembership"."user_id", + "posthog_organizationmembership"."level", + "posthog_organizationmembership"."joined_at", + "posthog_organizationmembership"."updated_at" + FROM "posthog_organizationmembership" + WHERE ("posthog_organizationmembership"."organization_id" = '00000000-0000-0000-0000-000000000000'::uuid + AND "posthog_organizationmembership"."user_id" = 99999) + LIMIT 21 + ''' +# --- +# name: TestOrganizationResourceAccessAPI.test_list_organization_resource_access_is_not_nplus1.5 + ''' + SELECT COUNT(*) AS "__count" + FROM "ee_organizationresourceaccess" + WHERE "ee_organizationresourceaccess"."organization_id" = '00000000-0000-0000-0000-000000000000'::uuid + ''' +# --- +# name: TestOrganizationResourceAccessAPI.test_list_organization_resource_access_is_not_nplus1.6 + ''' + SELECT "ee_organizationresourceaccess"."id", + "ee_organizationresourceaccess"."resource", + "ee_organizationresourceaccess"."access_level", + "ee_organizationresourceaccess"."organization_id", + "ee_organizationresourceaccess"."created_by_id", + "ee_organizationresourceaccess"."created_at", + "ee_organizationresourceaccess"."updated_at" + FROM "ee_organizationresourceaccess" + WHERE "ee_organizationresourceaccess"."organization_id" = '00000000-0000-0000-0000-000000000000'::uuid + LIMIT 100 + ''' +# --- +# name: TestOrganizationResourceAccessAPI.test_list_organization_resource_access_is_not_nplus1.7 + ''' + SELECT "posthog_user"."id", + "posthog_user"."password", + "posthog_user"."last_login", + "posthog_user"."first_name", + "posthog_user"."last_name", + "posthog_user"."is_staff", + "posthog_user"."date_joined", + "posthog_user"."uuid", + "posthog_user"."current_organization_id", + "posthog_user"."current_team_id", + "posthog_user"."email", + "posthog_user"."pending_email", + "posthog_user"."temporary_token", + "posthog_user"."distinct_id", + "posthog_user"."is_email_verified", + "posthog_user"."has_seen_product_intro_for", + "posthog_user"."strapi_id", + "posthog_user"."is_active", + "posthog_user"."theme_mode", + "posthog_user"."partial_notification_settings", + "posthog_user"."anonymize_data", + "posthog_user"."toolbar_mode", + "posthog_user"."hedgehog_config", + "posthog_user"."events_column_config", + "posthog_user"."email_opt_in" + FROM "posthog_user" + WHERE "posthog_user"."id" = 99999 + LIMIT 21 + ''' +# --- +# name: TestOrganizationResourceAccessAPI.test_list_organization_resource_access_is_not_nplus1.8 + ''' + SELECT "posthog_organization"."id", + "posthog_organization"."name", + "posthog_organization"."slug", + "posthog_organization"."logo_media_id", + "posthog_organization"."created_at", + "posthog_organization"."updated_at", + "posthog_organization"."plugins_access_level", + "posthog_organization"."for_internal_metrics", + "posthog_organization"."is_member_join_email_enabled", + "posthog_organization"."enforce_2fa", + "posthog_organization"."is_hipaa", + "posthog_organization"."customer_id", + "posthog_organization"."available_product_features", + "posthog_organization"."usage", + "posthog_organization"."never_drop_data", + "posthog_organization"."customer_trust_scores", + "posthog_organization"."setup_section_2_completed", + "posthog_organization"."personalization", + "posthog_organization"."domain_whitelist" + FROM "posthog_organization" + WHERE "posthog_organization"."id" = '00000000-0000-0000-0000-000000000000'::uuid + LIMIT 21 + ''' +# --- +# name: TestOrganizationResourceAccessAPI.test_list_organization_resource_access_is_not_nplus1.9 + ''' + SELECT "posthog_organization"."id", + "posthog_organization"."name", + "posthog_organization"."slug", + "posthog_organization"."logo_media_id", + "posthog_organization"."created_at", + "posthog_organization"."updated_at", + "posthog_organization"."plugins_access_level", + "posthog_organization"."for_internal_metrics", + "posthog_organization"."is_member_join_email_enabled", + "posthog_organization"."enforce_2fa", + "posthog_organization"."is_hipaa", + "posthog_organization"."customer_id", + "posthog_organization"."available_product_features", + "posthog_organization"."usage", + "posthog_organization"."never_drop_data", + "posthog_organization"."customer_trust_scores", + "posthog_organization"."setup_section_2_completed", + "posthog_organization"."personalization", + "posthog_organization"."domain_whitelist" + FROM "posthog_organization" + WHERE "posthog_organization"."id" = '00000000-0000-0000-0000-000000000000'::uuid + LIMIT 21 + ''' +# --- diff --git a/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel_correlations_persons.ambr b/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel_correlations_persons.ambr new file mode 100644 index 0000000000000..200f16b6117e7 --- /dev/null +++ b/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel_correlations_persons.ambr @@ -0,0 +1,785 @@ +# serializer version: 1 +# name: TestClickhouseFunnelCorrelationsActors.test_funnel_correlation_on_event_with_recordings + ''' + WITH funnel_actors as + (SELECT aggregation_target AS actor_id, + final_matching_events as matching_events , timestamp, steps, + final_timestamp, + first_timestamp + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) as step_0_matching_events, + groupArray(10)(step_1_matching_event) as step_1_matching_events, + groupArray(10)(final_matching_event) as final_matching_events , + argMax(latest_0, steps) as timestamp, + argMax(latest_1, steps) as final_timestamp, + argMax(latest_0, steps) as first_timestamp + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target) as max_steps, + step_1_conversion_time, + ("latest_0", + "uuid_0", + "$session_id_0", + "$window_id_0") as step_0_matching_event, + ("latest_1", + "uuid_1", + "$session_id_1", + "$window_id_1") as step_1_matching_event, + if(isNull(latest_0),(null, null, null, null),if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) as final_matching_event , + latest_0, + latest_1, + latest_0 + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + ("latest_0", + "uuid_0", + "$session_id_0", + "$window_id_0") as step_0_matching_event, + ("latest_1", + "uuid_1", + "$session_id_1", + "$window_id_1") as step_1_matching_event, + if(isNull(latest_0),(null, null, null, null),if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) as final_matching_event + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + "uuid_0", + "$session_id_0", + "$window_id_0", + step_1, + min(latest_1) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + last_value("uuid_1") over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) "uuid_1", + last_value("$session_id_1") over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) "$session_id_1", + last_value("$window_id_1") over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) "$window_id_1" + FROM + (SELECT e.timestamp as timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, + e.uuid AS uuid, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, + if(event = '$pageview', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(step_0 = 1, "uuid", null) as "uuid_0", + if(step_0 = 1, "$session_id", null) as "$session_id_0", + if(step_0 = 1, "$window_id", null) as "$window_id_0", + if(event = 'insight analyzed', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(step_1 = 1, "uuid", null) as "uuid_1", + if(step_1 = 1, "$session_id", null) as "$session_id_1", + if(step_1 = 1, "$window_id", null) as "$window_id_1" + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND event IN ['$pageview', 'insight analyzed'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-01-08 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + WHERE team_id = 99999 + AND event IN ['$pageview', 'insight analyzed'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-01-08 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1) )) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps + HAVING steps = max(max_steps)) + WHERE steps IN [1, 2] + ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000), + toDateTime('2021-01-08 23:59:59', 'UTC') AS date_to, + toDateTime('2021-01-01 00:00:00', 'UTC') AS date_from, + 2 AS target_step, + ['$pageview', 'insight analyzed'] as funnel_step_names + SELECT actors.actor_id AS actor_id , + any(actors.matching_events) AS matching_events + FROM events AS event + JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON pdi.distinct_id = events.distinct_id + JOIN funnel_actors AS actors ON pdi.person_id = actors.actor_id + WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to + AND event.team_id = 99999 + AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) + AND event.event NOT IN funnel_step_names + AND event.event = 'insight loaded' + AND actors.steps = target_step + GROUP BY actor_id + ORDER BY actor_id + LIMIT 100 + OFFSET 0 + ''' +# --- +# name: TestClickhouseFunnelCorrelationsActors.test_funnel_correlation_on_event_with_recordings.1 + ''' + + SELECT DISTINCT session_id + FROM session_replay_events + WHERE team_id = 99999 + and session_id in ['s2'] + AND min_first_timestamp >= '2020-12-31 00:00:00' + AND max_last_timestamp <= '2021-01-09 23:59:59' + ''' +# --- +# name: TestClickhouseFunnelCorrelationsActors.test_funnel_correlation_on_event_with_recordings.2 + ''' + WITH funnel_actors as + (SELECT aggregation_target AS actor_id, + final_matching_events as matching_events , timestamp, steps, + final_timestamp, + first_timestamp + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + avg(step_2_conversion_time) step_2_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner, + median(step_2_conversion_time) step_2_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) as step_0_matching_events, + groupArray(10)(step_1_matching_event) as step_1_matching_events, + groupArray(10)(step_2_matching_event) as step_2_matching_events, + groupArray(10)(final_matching_event) as final_matching_events , + argMax(latest_0, steps) as timestamp, + argMax(latest_2, steps) as final_timestamp, + argMax(latest_0, steps) as first_timestamp + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target) as max_steps, + step_1_conversion_time, + step_2_conversion_time, + ("latest_0", + "uuid_0", + "$session_id_0", + "$window_id_0") as step_0_matching_event, + ("latest_1", + "uuid_1", + "$session_id_1", + "$window_id_1") as step_1_matching_event, + ("latest_2", + "uuid_2", + "$session_id_2", + "$window_id_2") as step_2_matching_event, + if(isNull(latest_0),(null, null, null, null),if(isNull(latest_1), step_0_matching_event, if(isNull(latest_2), step_1_matching_event, step_2_matching_event))) as final_matching_event , + latest_0, + latest_2, + latest_0 + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 14 DAY + AND latest_1 <= latest_2 + AND latest_2 <= latest_0 + INTERVAL 14 DAY, 3, if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1)) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + if(isNotNull(latest_2) + AND latest_2 <= latest_1 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time, + ("latest_0", + "uuid_0", + "$session_id_0", + "$window_id_0") as step_0_matching_event, + ("latest_1", + "uuid_1", + "$session_id_1", + "$window_id_1") as step_1_matching_event, + ("latest_2", + "uuid_2", + "$session_id_2", + "$window_id_2") as step_2_matching_event, + if(isNull(latest_0),(null, null, null, null),if(isNull(latest_1), step_0_matching_event, if(isNull(latest_2), step_1_matching_event, step_2_matching_event))) as final_matching_event + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + "uuid_0", + "$session_id_0", + "$window_id_0", + step_1, + latest_1, + "uuid_1", + "$session_id_1", + "$window_id_1", + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2, + last_value("uuid_2") over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) "uuid_2", + last_value("$session_id_2") over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) "$session_id_2", + last_value("$window_id_2") over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) "$window_id_2" + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + "uuid_0", + "$session_id_0", + "$window_id_0", + step_1, + latest_1, + "uuid_1", + "$session_id_1", + "$window_id_1", + step_2, + if(latest_2 < latest_1, NULL, latest_2) as latest_2, + if(latest_2 < latest_1, NULL, "uuid_2") as "uuid_2", + if(latest_2 < latest_1, NULL, "$session_id_2") as "$session_id_2", + if(latest_2 < latest_1, NULL, "$window_id_2") as "$window_id_2" + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + "uuid_0", + "$session_id_0", + "$window_id_0", + step_1, + min(latest_1) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + last_value("uuid_1") over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) "uuid_1", + last_value("$session_id_1") over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) "$session_id_1", + last_value("$window_id_1") over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) "$window_id_1", + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2, + last_value("uuid_2") over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) "uuid_2", + last_value("$session_id_2") over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) "$session_id_2", + last_value("$window_id_2") over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) "$window_id_2" + FROM + (SELECT e.timestamp as timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, + e.uuid AS uuid, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, + if(event = '$pageview', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(step_0 = 1, "uuid", null) as "uuid_0", + if(step_0 = 1, "$session_id", null) as "$session_id_0", + if(step_0 = 1, "$window_id", null) as "$window_id_0", + if(event = 'insight analyzed', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(step_1 = 1, "uuid", null) as "uuid_1", + if(step_1 = 1, "$session_id", null) as "$session_id_1", + if(step_1 = 1, "$window_id", null) as "$window_id_1", + if(event = 'insight updated', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2, + if(step_2 = 1, "uuid", null) as "uuid_2", + if(step_2 = 1, "$session_id", null) as "$session_id_2", + if(step_2 = 1, "$window_id", null) as "$window_id_2" + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND event IN ['$pageview', 'insight analyzed', 'insight updated'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-01-08 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + WHERE team_id = 99999 + AND event IN ['$pageview', 'insight analyzed', 'insight updated'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-01-08 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) )))) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps + HAVING steps = max(max_steps)) + WHERE steps IN [1, 2, 3] + ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000), + toDateTime('2021-01-08 23:59:59', 'UTC') AS date_to, + toDateTime('2021-01-01 00:00:00', 'UTC') AS date_from, + 3 AS target_step, + ['$pageview', 'insight analyzed', 'insight updated'] as funnel_step_names + SELECT actors.actor_id AS actor_id , + any(actors.matching_events) AS matching_events + FROM events AS event + JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON pdi.distinct_id = events.distinct_id + JOIN funnel_actors AS actors ON pdi.person_id = actors.actor_id + WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to + AND event.team_id = 99999 + AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp + AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) + AND event.event NOT IN funnel_step_names + AND event.event = 'insight loaded' + AND actors.steps <> target_step + GROUP BY actor_id + ORDER BY actor_id + LIMIT 100 + OFFSET 0 + ''' +# --- +# name: TestClickhouseFunnelCorrelationsActors.test_funnel_correlation_on_event_with_recordings.3 + ''' + + SELECT DISTINCT session_id + FROM session_replay_events + WHERE team_id = 99999 + and session_id in ['s2'] + AND min_first_timestamp >= '2020-12-31 00:00:00' + AND max_last_timestamp <= '2021-01-09 23:59:59' + ''' +# --- +# name: TestClickhouseFunnelCorrelationsActors.test_funnel_correlation_on_properties_with_recordings + ''' + WITH funnel_actors AS + (SELECT aggregation_target AS actor_id, + final_matching_events as matching_events , timestamp, steps, + final_timestamp, + first_timestamp + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) as step_0_matching_events, + groupArray(10)(step_1_matching_event) as step_1_matching_events, + groupArray(10)(final_matching_event) as final_matching_events , + argMax(latest_0, steps) as timestamp, + argMax(latest_1, steps) as final_timestamp, + argMax(latest_0, steps) as first_timestamp + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target) as max_steps, + step_1_conversion_time, + ("latest_0", + "uuid_0", + "$session_id_0", + "$window_id_0") as step_0_matching_event, + ("latest_1", + "uuid_1", + "$session_id_1", + "$window_id_1") as step_1_matching_event, + if(isNull(latest_0),(null, null, null, null),if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) as final_matching_event , + latest_0, + latest_1, + latest_0 + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + ("latest_0", + "uuid_0", + "$session_id_0", + "$window_id_0") as step_0_matching_event, + ("latest_1", + "uuid_1", + "$session_id_1", + "$window_id_1") as step_1_matching_event, + if(isNull(latest_0),(null, null, null, null),if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) as final_matching_event + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + "uuid_0", + "$session_id_0", + "$window_id_0", + step_1, + min(latest_1) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + last_value("uuid_1") over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) "uuid_1", + last_value("$session_id_1") over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) "$session_id_1", + last_value("$window_id_1") over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) "$window_id_1" + FROM + (SELECT e.timestamp as timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, + e.uuid AS uuid, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, + if(event = '$pageview', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(step_0 = 1, "uuid", null) as "uuid_0", + if(step_0 = 1, "$session_id", null) as "$session_id_0", + if(step_0 = 1, "$window_id", null) as "$window_id_0", + if(event = 'insight analyzed', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(step_1 = 1, "uuid", null) as "uuid_1", + if(step_1 = 1, "$session_id", null) as "$session_id_1", + if(step_1 = 1, "$window_id", null) as "$window_id_1" + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND event IN ['$pageview', 'insight analyzed'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-01-08 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + INNER JOIN + (SELECT id + FROM person + WHERE team_id = 99999 + AND id IN + (SELECT id + FROM person + WHERE team_id = 99999 + AND (has(['bar'], replaceRegexpAll(JSONExtractRaw(properties, 'foo'), '^"|"$', ''))) ) + GROUP BY id + HAVING max(is_deleted) = 0 + AND (has(['bar'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'foo'), '^"|"$', ''))) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id + WHERE team_id = 99999 + AND event IN ['$pageview', 'insight analyzed'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-01-08 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1) )) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps + HAVING steps = max(max_steps)) + WHERE steps IN [1, 2] + ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000), + 2 AS target_step + SELECT funnel_actors.actor_id AS actor_id , + any(funnel_actors.matching_events) AS matching_events + FROM funnel_actors + WHERE funnel_actors.steps = target_step + GROUP BY funnel_actors.actor_id + ORDER BY actor_id + LIMIT 100 + OFFSET 0 + ''' +# --- +# name: TestClickhouseFunnelCorrelationsActors.test_funnel_correlation_on_properties_with_recordings.1 + ''' + + SELECT DISTINCT session_id + FROM session_replay_events + WHERE team_id = 99999 + and session_id in ['s2'] + AND min_first_timestamp >= '2020-12-31 00:00:00' + AND max_last_timestamp <= '2021-01-09 23:59:59' + ''' +# --- +# name: TestClickhouseFunnelCorrelationsActors.test_strict_funnel_correlation_with_recordings + ''' + WITH funnel_actors AS + (SELECT aggregation_target AS actor_id, + final_matching_events as matching_events , timestamp, steps, + final_timestamp, + first_timestamp + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner , + argMax(latest_0, steps) as timestamp, + argMax(latest_1, steps) as final_timestamp, + argMax(latest_0, steps) as first_timestamp, + groupArray(10)(step_0_matching_event) as step_0_matching_events, + groupArray(10)(step_1_matching_event) as step_1_matching_events, + groupArray(10)(final_matching_event) as final_matching_events + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target) as max_steps, + step_1_conversion_time , + latest_0, + latest_1, + latest_0, + ("latest_0", + "uuid_0", + "$session_id_0", + "$window_id_0") as step_0_matching_event, + ("latest_1", + "uuid_1", + "$session_id_1", + "$window_id_1") as step_1_matching_event, + if(isNull(latest_0),(null, null, null, null),if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) as final_matching_event + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps, + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + ("latest_0", + "uuid_0", + "$session_id_0", + "$window_id_0") as step_0_matching_event, + ("latest_1", + "uuid_1", + "$session_id_1", + "$window_id_1") as step_1_matching_event, + if(isNull(latest_0),(null, null, null, null),if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) as final_matching_event + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + "uuid_0", + "$session_id_0", + "$window_id_0", + step_1, + min(latest_1) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) latest_1, + min("uuid_1") over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) "uuid_1", + min("$session_id_1") over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) "$session_id_1", + min("$window_id_1") over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) "$window_id_1" + FROM + (SELECT e.timestamp as timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, + e.uuid AS uuid, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, + if(event = '$pageview', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(step_0 = 1, "uuid", null) as "uuid_0", + if(step_0 = 1, "$session_id", null) as "$session_id_0", + if(step_0 = 1, "$window_id", null) as "$window_id_0", + if(event = 'insight analyzed', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(step_1 = 1, "uuid", null) as "uuid_1", + if(step_1 = 1, "$session_id", null) as "$session_id_1", + if(step_1 = 1, "$window_id", null) as "$window_id_1" + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-01-08 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + INNER JOIN + (SELECT id + FROM person + WHERE team_id = 99999 + AND id IN + (SELECT id + FROM person + WHERE team_id = 99999 + AND (has(['bar'], replaceRegexpAll(JSONExtractRaw(properties, 'foo'), '^"|"$', ''))) ) + GROUP BY id + HAVING max(is_deleted) = 0 + AND (has(['bar'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'foo'), '^"|"$', ''))) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-01-08 23:59:59', 'UTC') + AND (1=1) )) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps + HAVING steps = max(max_steps)) + WHERE steps IN [1, 2] + ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000), + 2 AS target_step + SELECT funnel_actors.actor_id AS actor_id , + any(funnel_actors.matching_events) AS matching_events + FROM funnel_actors + WHERE funnel_actors.steps = target_step + GROUP BY funnel_actors.actor_id + ORDER BY actor_id + LIMIT 100 + OFFSET 0 + ''' +# --- +# name: TestClickhouseFunnelCorrelationsActors.test_strict_funnel_correlation_with_recordings.1 + ''' + + SELECT DISTINCT session_id + FROM session_replay_events + WHERE team_id = 99999 + and session_id in ['s2'] + AND min_first_timestamp >= '2020-12-31 00:00:00' + AND max_last_timestamp <= '2021-01-09 23:59:59' + ''' +# --- +# name: TestClickhouseFunnelCorrelationsActors.test_strict_funnel_correlation_with_recordings.2 + ''' + WITH funnel_actors AS + (SELECT aggregation_target AS actor_id, + final_matching_events as matching_events , timestamp, steps, + final_timestamp, + first_timestamp + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner , + argMax(latest_0, steps) as timestamp, + argMax(latest_1, steps) as final_timestamp, + argMax(latest_0, steps) as first_timestamp, + groupArray(10)(step_0_matching_event) as step_0_matching_events, + groupArray(10)(step_1_matching_event) as step_1_matching_events, + groupArray(10)(final_matching_event) as final_matching_events + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target) as max_steps, + step_1_conversion_time , + latest_0, + latest_1, + latest_0, + ("latest_0", + "uuid_0", + "$session_id_0", + "$window_id_0") as step_0_matching_event, + ("latest_1", + "uuid_1", + "$session_id_1", + "$window_id_1") as step_1_matching_event, + if(isNull(latest_0),(null, null, null, null),if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) as final_matching_event + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps, + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + ("latest_0", + "uuid_0", + "$session_id_0", + "$window_id_0") as step_0_matching_event, + ("latest_1", + "uuid_1", + "$session_id_1", + "$window_id_1") as step_1_matching_event, + if(isNull(latest_0),(null, null, null, null),if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) as final_matching_event + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + "uuid_0", + "$session_id_0", + "$window_id_0", + step_1, + min(latest_1) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) latest_1, + min("uuid_1") over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) "uuid_1", + min("$session_id_1") over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) "$session_id_1", + min("$window_id_1") over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) "$window_id_1" + FROM + (SELECT e.timestamp as timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, + e.uuid AS uuid, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, + if(event = '$pageview', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(step_0 = 1, "uuid", null) as "uuid_0", + if(step_0 = 1, "$session_id", null) as "$session_id_0", + if(step_0 = 1, "$window_id", null) as "$window_id_0", + if(event = 'insight analyzed', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(step_1 = 1, "uuid", null) as "uuid_1", + if(step_1 = 1, "$session_id", null) as "$session_id_1", + if(step_1 = 1, "$window_id", null) as "$window_id_1" + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-01-08 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + INNER JOIN + (SELECT id + FROM person + WHERE team_id = 99999 + AND id IN + (SELECT id + FROM person + WHERE team_id = 99999 + AND (has(['bar'], replaceRegexpAll(JSONExtractRaw(properties, 'foo'), '^"|"$', ''))) ) + GROUP BY id + HAVING max(is_deleted) = 0 + AND (has(['bar'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'foo'), '^"|"$', ''))) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-01-08 23:59:59', 'UTC') + AND (1=1) )) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps + HAVING steps = max(max_steps)) + WHERE steps IN [1, 2] + ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000), + 2 AS target_step + SELECT funnel_actors.actor_id AS actor_id , + any(funnel_actors.matching_events) AS matching_events + FROM funnel_actors + WHERE funnel_actors.steps <> target_step + GROUP BY funnel_actors.actor_id + ORDER BY actor_id + LIMIT 100 + OFFSET 0 + ''' +# --- +# name: TestClickhouseFunnelCorrelationsActors.test_strict_funnel_correlation_with_recordings.3 + ''' + + SELECT DISTINCT session_id + FROM session_replay_events + WHERE team_id = 99999 + and session_id in ['s3'] + AND min_first_timestamp >= '2020-12-31 00:00:00' + AND max_last_timestamp <= '2021-01-09 23:59:59' + ''' +# --- diff --git a/ee/clickhouse/queries/test/__snapshots__/test_paths.ambr b/ee/clickhouse/queries/test/__snapshots__/test_paths.ambr index ce881b241df4f..fd94135b2ee7a 100644 --- a/ee/clickhouse/queries/test/__snapshots__/test_paths.ambr +++ b/ee/clickhouse/queries/test/__snapshots__/test_paths.ambr @@ -1,4 +1,3611 @@ # serializer version: 1 +# name: TestClickhousePaths.test_by_funnel_after_dropoff + ''' + WITH funnel_actors AS + (SELECT aggregation_target AS actor_id , timestamp + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + avg(step_2_conversion_time) step_2_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner, + median(step_2_conversion_time) step_2_median_conversion_time_inner , + argMax(latest_0, steps) as timestamp, + argMax(latest_2, steps) as final_timestamp, + argMax(latest_0, steps) as first_timestamp + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target) as max_steps, + step_1_conversion_time, + step_2_conversion_time , + latest_0, + latest_2, + latest_0 + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY + AND latest_1 <= latest_2 + AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + if(isNotNull(latest_2) + AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + if(latest_2 < latest_1, NULL, latest_2) as latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT e.timestamp as timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, + if(event = 'step one', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'step two', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'step three', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2 + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) )))) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps + HAVING steps = max(max_steps)) + WHERE steps = 1 + ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000) + SELECT last_path_key as source_event, + path_key as target_event, + COUNT(*) AS event_count, + avg(conversion_time) AS average_conversion_time + FROM + (SELECT person_id, + path, + conversion_time, + event_in_session_index, + concat(toString(event_in_session_index), '_', path) as path_key, + if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, + path_dropoff_key + FROM + (SELECT person_id , + joined_path_tuple.1 as path , + joined_path_tuple.2 as conversion_time , + joined_path_tuple.3 as prev_path , + event_in_session_index , + session_index , + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , + arrayFilter((x, y) -> y, time, mapping) as timings , + arrayFilter((x, y)->y, path_basic, mapping) as compact_path , + indexOf(compact_path, NULL) as target_index , + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , + arraySlice(filtered_path, 1, 5) as limited_path , + arraySlice(filtered_timings, 1, 5) as limited_timings , + arrayDifference(limited_timings) as timings_diff , + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + FROM + (SELECT person_id , + path_time_tuple.1 as path_basic , + path_time_tuple.2 as time , + session_index , + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths + FROM + (SELECT person_id, + groupArray(toUnixTimestamp64Milli(timestamp)) as timing, + groupArray(path_item) as paths + FROM + (SELECT e.timestamp AS timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS person_id, + funnel_actors.timestamp AS target_timestamp, + ifNull(if(equals(event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), event)), '') AS path_item_ungrouped, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + if(group_index > 0, NULL[group_index], path_item_ungrouped) AS path_item + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + JOIN funnel_actors ON funnel_actors.actor_id = if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND e.timestamp >= target_timestamp + ORDER BY if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id), + e.timestamp) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE source_event IS NOT NULL + GROUP BY source_event, + target_event + ORDER BY event_count DESC, + source_event, + target_event + LIMIT 50 + ''' +# --- +# name: TestClickhousePaths.test_by_funnel_after_dropoff.1 + ''' + WITH funnel_actors AS + (SELECT aggregation_target AS actor_id , timestamp + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + avg(step_2_conversion_time) step_2_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner, + median(step_2_conversion_time) step_2_median_conversion_time_inner , + argMax(latest_0, steps) as timestamp, + argMax(latest_2, steps) as final_timestamp, + argMax(latest_0, steps) as first_timestamp + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target) as max_steps, + step_1_conversion_time, + step_2_conversion_time , + latest_0, + latest_2, + latest_0 + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY + AND latest_1 <= latest_2 + AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + if(isNotNull(latest_2) + AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + if(latest_2 < latest_1, NULL, latest_2) as latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT e.timestamp as timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, + if(event = 'step one', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'step two', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'step three', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2 + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) )))) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps + HAVING steps = max(max_steps)) + WHERE steps = 1 + ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000) + SELECT DISTINCT person_id AS actor_id + FROM + (SELECT person_id, + path, + conversion_time, + event_in_session_index, + concat(toString(event_in_session_index), '_', path) as path_key, + if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, + path_dropoff_key + FROM + (SELECT person_id , + joined_path_tuple.1 as path , + joined_path_tuple.2 as conversion_time , + joined_path_tuple.3 as prev_path , + event_in_session_index , + session_index , + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , + arrayFilter((x, y) -> y, time, mapping) as timings , + arrayFilter((x, y)->y, path_basic, mapping) as compact_path , + indexOf(compact_path, NULL) as target_index , + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , + arraySlice(filtered_path, 1, 5) as limited_path , + arraySlice(filtered_timings, 1, 5) as limited_timings , + arrayDifference(limited_timings) as timings_diff , + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + FROM + (SELECT person_id , + path_time_tuple.1 as path_basic , + path_time_tuple.2 as time , + session_index , + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths + FROM + (SELECT person_id, + groupArray(toUnixTimestamp64Milli(timestamp)) as timing, + groupArray(path_item) as paths + FROM + (SELECT e.timestamp AS timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS person_id, + funnel_actors.timestamp AS target_timestamp, + ifNull(if(equals(event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), event)), '') AS path_item_ungrouped, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + if(group_index > 0, NULL[group_index], path_item_ungrouped) AS path_item + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + JOIN funnel_actors ON funnel_actors.actor_id = if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND e.timestamp >= target_timestamp + ORDER BY if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id), + e.timestamp) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE last_path_key = '1_step one' + AND path_key = '2_step dropoff1' + ORDER BY person_id + LIMIT 100 + OFFSET 0 + ''' +# --- +# name: TestClickhousePaths.test_by_funnel_after_dropoff.2 + ''' + WITH funnel_actors AS + (SELECT aggregation_target AS actor_id , timestamp + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + avg(step_2_conversion_time) step_2_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner, + median(step_2_conversion_time) step_2_median_conversion_time_inner , + argMax(latest_0, steps) as timestamp, + argMax(latest_2, steps) as final_timestamp, + argMax(latest_0, steps) as first_timestamp + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target) as max_steps, + step_1_conversion_time, + step_2_conversion_time , + latest_0, + latest_2, + latest_0 + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY + AND latest_1 <= latest_2 + AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + if(isNotNull(latest_2) + AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + if(latest_2 < latest_1, NULL, latest_2) as latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT e.timestamp as timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, + if(event = 'step one', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'step two', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'step three', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2 + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) )))) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps + HAVING steps = max(max_steps)) + WHERE steps = 1 + ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000) + SELECT DISTINCT person_id AS actor_id + FROM + (SELECT person_id, + path, + conversion_time, + event_in_session_index, + concat(toString(event_in_session_index), '_', path) as path_key, + if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, + path_dropoff_key + FROM + (SELECT person_id , + joined_path_tuple.1 as path , + joined_path_tuple.2 as conversion_time , + joined_path_tuple.3 as prev_path , + event_in_session_index , + session_index , + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , + arrayFilter((x, y) -> y, time, mapping) as timings , + arrayFilter((x, y)->y, path_basic, mapping) as compact_path , + indexOf(compact_path, NULL) as target_index , + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , + arraySlice(filtered_path, 1, 5) as limited_path , + arraySlice(filtered_timings, 1, 5) as limited_timings , + arrayDifference(limited_timings) as timings_diff , + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + FROM + (SELECT person_id , + path_time_tuple.1 as path_basic , + path_time_tuple.2 as time , + session_index , + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths + FROM + (SELECT person_id, + groupArray(toUnixTimestamp64Milli(timestamp)) as timing, + groupArray(path_item) as paths + FROM + (SELECT e.timestamp AS timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS person_id, + funnel_actors.timestamp AS target_timestamp, + ifNull(if(equals(event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), event)), '') AS path_item_ungrouped, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + if(group_index > 0, NULL[group_index], path_item_ungrouped) AS path_item + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + JOIN funnel_actors ON funnel_actors.actor_id = if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND e.timestamp >= target_timestamp + ORDER BY if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id), + e.timestamp) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE last_path_key = '2_step dropoff1' + AND path_key = '3_step dropoff2' + ORDER BY person_id + LIMIT 100 + OFFSET 0 + ''' +# --- +# name: TestClickhousePaths.test_by_funnel_after_dropoff.3 + ''' + WITH funnel_actors AS + (SELECT aggregation_target AS actor_id , timestamp + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + avg(step_2_conversion_time) step_2_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner, + median(step_2_conversion_time) step_2_median_conversion_time_inner , + argMax(latest_0, steps) as timestamp, + argMax(latest_2, steps) as final_timestamp, + argMax(latest_0, steps) as first_timestamp + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target) as max_steps, + step_1_conversion_time, + step_2_conversion_time , + latest_0, + latest_2, + latest_0 + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY + AND latest_1 <= latest_2 + AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + if(isNotNull(latest_2) + AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + if(latest_2 < latest_1, NULL, latest_2) as latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT e.timestamp as timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, + if(event = 'step one', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'step two', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'step three', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2 + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) )))) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps + HAVING steps = max(max_steps)) + WHERE steps = 1 + ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000) + SELECT DISTINCT person_id AS actor_id + FROM + (SELECT person_id, + path, + conversion_time, + event_in_session_index, + concat(toString(event_in_session_index), '_', path) as path_key, + if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, + path_dropoff_key + FROM + (SELECT person_id , + joined_path_tuple.1 as path , + joined_path_tuple.2 as conversion_time , + joined_path_tuple.3 as prev_path , + event_in_session_index , + session_index , + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , + arrayFilter((x, y) -> y, time, mapping) as timings , + arrayFilter((x, y)->y, path_basic, mapping) as compact_path , + indexOf(compact_path, NULL) as target_index , + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , + arraySlice(filtered_path, 1, 5) as limited_path , + arraySlice(filtered_timings, 1, 5) as limited_timings , + arrayDifference(limited_timings) as timings_diff , + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + FROM + (SELECT person_id , + path_time_tuple.1 as path_basic , + path_time_tuple.2 as time , + session_index , + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths + FROM + (SELECT person_id, + groupArray(toUnixTimestamp64Milli(timestamp)) as timing, + groupArray(path_item) as paths + FROM + (SELECT e.timestamp AS timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS person_id, + funnel_actors.timestamp AS target_timestamp, + ifNull(if(equals(event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), event)), '') AS path_item_ungrouped, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + if(group_index > 0, NULL[group_index], path_item_ungrouped) AS path_item + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + JOIN funnel_actors ON funnel_actors.actor_id = if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND e.timestamp >= target_timestamp + ORDER BY if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id), + e.timestamp) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE last_path_key = '3_step dropoff2' + AND path_key = '4_step branch' + ORDER BY person_id + LIMIT 100 + OFFSET 0 + ''' +# --- +# name: TestClickhousePaths.test_by_funnel_after_dropoff.4 + ''' + WITH funnel_actors AS + (SELECT aggregation_target AS actor_id , timestamp + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + avg(step_2_conversion_time) step_2_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner, + median(step_2_conversion_time) step_2_median_conversion_time_inner , + argMax(latest_0, steps) as timestamp, + argMax(latest_2, steps) as final_timestamp, + argMax(latest_0, steps) as first_timestamp + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target) as max_steps, + step_1_conversion_time, + step_2_conversion_time , + latest_0, + latest_2, + latest_0 + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY + AND latest_1 <= latest_2 + AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + if(isNotNull(latest_2) + AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + if(latest_2 < latest_1, NULL, latest_2) as latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT e.timestamp as timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, + if(event = 'step one', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'step two', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'step three', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2 + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) )))) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps + HAVING steps = max(max_steps)) + WHERE steps = 1 + ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000) + SELECT DISTINCT person_id AS actor_id + FROM + (SELECT person_id, + path, + conversion_time, + event_in_session_index, + concat(toString(event_in_session_index), '_', path) as path_key, + if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, + path_dropoff_key + FROM + (SELECT person_id , + joined_path_tuple.1 as path , + joined_path_tuple.2 as conversion_time , + joined_path_tuple.3 as prev_path , + event_in_session_index , + session_index , + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , + arrayFilter((x, y) -> y, time, mapping) as timings , + arrayFilter((x, y)->y, path_basic, mapping) as compact_path , + indexOf(compact_path, NULL) as target_index , + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , + arraySlice(filtered_path, 1, 5) as limited_path , + arraySlice(filtered_timings, 1, 5) as limited_timings , + arrayDifference(limited_timings) as timings_diff , + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + FROM + (SELECT person_id , + path_time_tuple.1 as path_basic , + path_time_tuple.2 as time , + session_index , + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths + FROM + (SELECT person_id, + groupArray(toUnixTimestamp64Milli(timestamp)) as timing, + groupArray(path_item) as paths + FROM + (SELECT e.timestamp AS timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS person_id, + funnel_actors.timestamp AS target_timestamp, + ifNull(if(equals(event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), event)), '') AS path_item_ungrouped, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + if(group_index > 0, NULL[group_index], path_item_ungrouped) AS path_item + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + JOIN funnel_actors ON funnel_actors.actor_id = if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND e.timestamp >= target_timestamp + ORDER BY if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id), + e.timestamp) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE last_path_key = '4_step branch' + AND path_key = '3_step dropoff2' + ORDER BY person_id + LIMIT 100 + OFFSET 0 + ''' +# --- +# name: TestClickhousePaths.test_by_funnel_after_dropoff_with_group_filter + ''' + WITH funnel_actors AS + (SELECT aggregation_target AS actor_id , timestamp + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + avg(step_2_conversion_time) step_2_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner, + median(step_2_conversion_time) step_2_median_conversion_time_inner , + argMax(latest_0, steps) as timestamp, + argMax(latest_2, steps) as final_timestamp, + argMax(latest_0, steps) as first_timestamp + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target) as max_steps, + step_1_conversion_time, + step_2_conversion_time , + latest_0, + latest_2, + latest_0 + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY + AND latest_1 <= latest_2 + AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + if(isNotNull(latest_2) + AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + if(latest_2 < latest_1, NULL, latest_2) as latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT e.timestamp as timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, + if(event = 'step one', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'step two', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'step three', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2 + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) )))) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps + HAVING steps = max(max_steps)) + WHERE steps = 1 + ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000) + SELECT last_path_key as source_event, + path_key as target_event, + COUNT(*) AS event_count, + avg(conversion_time) AS average_conversion_time + FROM + (SELECT person_id, + path, + conversion_time, + event_in_session_index, + concat(toString(event_in_session_index), '_', path) as path_key, + if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, + path_dropoff_key + FROM + (SELECT person_id , + joined_path_tuple.1 as path , + joined_path_tuple.2 as conversion_time , + joined_path_tuple.3 as prev_path , + event_in_session_index , + session_index , + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , + arrayFilter((x, y) -> y, time, mapping) as timings , + arrayFilter((x, y)->y, path_basic, mapping) as compact_path , + indexOf(compact_path, NULL) as target_index , + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , + arraySlice(filtered_path, 1, 5) as limited_path , + arraySlice(filtered_timings, 1, 5) as limited_timings , + arrayDifference(limited_timings) as timings_diff , + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + FROM + (SELECT person_id , + path_time_tuple.1 as path_basic , + path_time_tuple.2 as time , + session_index , + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths + FROM + (SELECT person_id, + groupArray(toUnixTimestamp64Milli(timestamp)) as timing, + groupArray(path_item) as paths + FROM + (SELECT e.timestamp AS timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS person_id, + funnel_actors.timestamp AS target_timestamp, + ifNull(if(equals(event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), event)), '') AS path_item_ungrouped, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + if(group_index > 0, NULL[group_index], path_item_ungrouped) AS path_item + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 99999 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + JOIN funnel_actors ON funnel_actors.actor_id = if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND (has(['technology'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) + AND e.timestamp >= target_timestamp + ORDER BY if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id), + e.timestamp) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE source_event IS NOT NULL + GROUP BY source_event, + target_event + ORDER BY event_count DESC, + source_event, + target_event + LIMIT 50 + ''' +# --- +# name: TestClickhousePaths.test_by_funnel_after_dropoff_with_group_filter.1 + ''' + WITH funnel_actors AS + (SELECT aggregation_target AS actor_id , timestamp + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + avg(step_2_conversion_time) step_2_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner, + median(step_2_conversion_time) step_2_median_conversion_time_inner , + argMax(latest_0, steps) as timestamp, + argMax(latest_2, steps) as final_timestamp, + argMax(latest_0, steps) as first_timestamp + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target) as max_steps, + step_1_conversion_time, + step_2_conversion_time , + latest_0, + latest_2, + latest_0 + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY + AND latest_1 <= latest_2 + AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + if(isNotNull(latest_2) + AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + if(latest_2 < latest_1, NULL, latest_2) as latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT e.timestamp as timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, + if(event = 'step one', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'step two', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'step three', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2 + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) )))) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps + HAVING steps = max(max_steps)) + WHERE steps = 1 + ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000) + SELECT DISTINCT person_id AS actor_id + FROM + (SELECT person_id, + path, + conversion_time, + event_in_session_index, + concat(toString(event_in_session_index), '_', path) as path_key, + if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, + path_dropoff_key + FROM + (SELECT person_id , + joined_path_tuple.1 as path , + joined_path_tuple.2 as conversion_time , + joined_path_tuple.3 as prev_path , + event_in_session_index , + session_index , + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , + arrayFilter((x, y) -> y, time, mapping) as timings , + arrayFilter((x, y)->y, path_basic, mapping) as compact_path , + indexOf(compact_path, NULL) as target_index , + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , + arraySlice(filtered_path, 1, 5) as limited_path , + arraySlice(filtered_timings, 1, 5) as limited_timings , + arrayDifference(limited_timings) as timings_diff , + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + FROM + (SELECT person_id , + path_time_tuple.1 as path_basic , + path_time_tuple.2 as time , + session_index , + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths + FROM + (SELECT person_id, + groupArray(toUnixTimestamp64Milli(timestamp)) as timing, + groupArray(path_item) as paths + FROM + (SELECT e.timestamp AS timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS person_id, + funnel_actors.timestamp AS target_timestamp, + ifNull(if(equals(event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), event)), '') AS path_item_ungrouped, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + if(group_index > 0, NULL[group_index], path_item_ungrouped) AS path_item + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 99999 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + JOIN funnel_actors ON funnel_actors.actor_id = if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND (has(['technology'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) + AND e.timestamp >= target_timestamp + ORDER BY if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id), + e.timestamp) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE last_path_key = '1_step one' + AND path_key = '2_step dropoff1' + ORDER BY person_id + LIMIT 100 + OFFSET 0 + ''' +# --- +# name: TestClickhousePaths.test_by_funnel_after_dropoff_with_group_filter.2 + ''' + WITH funnel_actors AS + (SELECT aggregation_target AS actor_id , timestamp + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + avg(step_2_conversion_time) step_2_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner, + median(step_2_conversion_time) step_2_median_conversion_time_inner , + argMax(latest_0, steps) as timestamp, + argMax(latest_2, steps) as final_timestamp, + argMax(latest_0, steps) as first_timestamp + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target) as max_steps, + step_1_conversion_time, + step_2_conversion_time , + latest_0, + latest_2, + latest_0 + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY + AND latest_1 <= latest_2 + AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + if(isNotNull(latest_2) + AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + if(latest_2 < latest_1, NULL, latest_2) as latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT e.timestamp as timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, + if(event = 'step one', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'step two', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'step three', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2 + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) )))) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps + HAVING steps = max(max_steps)) + WHERE steps = 1 + ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000) + SELECT DISTINCT person_id AS actor_id + FROM + (SELECT person_id, + path, + conversion_time, + event_in_session_index, + concat(toString(event_in_session_index), '_', path) as path_key, + if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, + path_dropoff_key + FROM + (SELECT person_id , + joined_path_tuple.1 as path , + joined_path_tuple.2 as conversion_time , + joined_path_tuple.3 as prev_path , + event_in_session_index , + session_index , + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , + arrayFilter((x, y) -> y, time, mapping) as timings , + arrayFilter((x, y)->y, path_basic, mapping) as compact_path , + indexOf(compact_path, NULL) as target_index , + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , + arraySlice(filtered_path, 1, 5) as limited_path , + arraySlice(filtered_timings, 1, 5) as limited_timings , + arrayDifference(limited_timings) as timings_diff , + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + FROM + (SELECT person_id , + path_time_tuple.1 as path_basic , + path_time_tuple.2 as time , + session_index , + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths + FROM + (SELECT person_id, + groupArray(toUnixTimestamp64Milli(timestamp)) as timing, + groupArray(path_item) as paths + FROM + (SELECT e.timestamp AS timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS person_id, + funnel_actors.timestamp AS target_timestamp, + ifNull(if(equals(event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), event)), '') AS path_item_ungrouped, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + if(group_index > 0, NULL[group_index], path_item_ungrouped) AS path_item + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 99999 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + JOIN funnel_actors ON funnel_actors.actor_id = if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND (has(['technology'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) + AND e.timestamp >= target_timestamp + ORDER BY if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id), + e.timestamp) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE last_path_key = '2_step dropoff1' + AND path_key = '3_step dropoff2' + ORDER BY person_id + LIMIT 100 + OFFSET 0 + ''' +# --- +# name: TestClickhousePaths.test_by_funnel_after_dropoff_with_group_filter.3 + ''' + WITH funnel_actors AS + (SELECT aggregation_target AS actor_id , timestamp + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + avg(step_2_conversion_time) step_2_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner, + median(step_2_conversion_time) step_2_median_conversion_time_inner , + argMax(latest_0, steps) as timestamp, + argMax(latest_2, steps) as final_timestamp, + argMax(latest_0, steps) as first_timestamp + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target) as max_steps, + step_1_conversion_time, + step_2_conversion_time , + latest_0, + latest_2, + latest_0 + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY + AND latest_1 <= latest_2 + AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + if(isNotNull(latest_2) + AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + if(latest_2 < latest_1, NULL, latest_2) as latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT e.timestamp as timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, + if(event = 'step one', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'step two', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'step three', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2 + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) )))) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps + HAVING steps = max(max_steps)) + WHERE steps = 1 + ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000) + SELECT DISTINCT person_id AS actor_id + FROM + (SELECT person_id, + path, + conversion_time, + event_in_session_index, + concat(toString(event_in_session_index), '_', path) as path_key, + if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, + path_dropoff_key + FROM + (SELECT person_id , + joined_path_tuple.1 as path , + joined_path_tuple.2 as conversion_time , + joined_path_tuple.3 as prev_path , + event_in_session_index , + session_index , + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , + arrayFilter((x, y) -> y, time, mapping) as timings , + arrayFilter((x, y)->y, path_basic, mapping) as compact_path , + indexOf(compact_path, NULL) as target_index , + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , + arraySlice(filtered_path, 1, 5) as limited_path , + arraySlice(filtered_timings, 1, 5) as limited_timings , + arrayDifference(limited_timings) as timings_diff , + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + FROM + (SELECT person_id , + path_time_tuple.1 as path_basic , + path_time_tuple.2 as time , + session_index , + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths + FROM + (SELECT person_id, + groupArray(toUnixTimestamp64Milli(timestamp)) as timing, + groupArray(path_item) as paths + FROM + (SELECT e.timestamp AS timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS person_id, + funnel_actors.timestamp AS target_timestamp, + ifNull(if(equals(event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), event)), '') AS path_item_ungrouped, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + if(group_index > 0, NULL[group_index], path_item_ungrouped) AS path_item + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 99999 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + JOIN funnel_actors ON funnel_actors.actor_id = if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND (has(['technology'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) + AND e.timestamp >= target_timestamp + ORDER BY if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id), + e.timestamp) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE last_path_key = '3_step dropoff2' + AND path_key = '4_step branch' + ORDER BY person_id + LIMIT 100 + OFFSET 0 + ''' +# --- +# name: TestClickhousePaths.test_by_funnel_after_dropoff_with_group_filter.4 + ''' + WITH funnel_actors AS + (SELECT aggregation_target AS actor_id , timestamp + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + avg(step_2_conversion_time) step_2_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner, + median(step_2_conversion_time) step_2_median_conversion_time_inner , + argMax(latest_0, steps) as timestamp, + argMax(latest_2, steps) as final_timestamp, + argMax(latest_0, steps) as first_timestamp + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target) as max_steps, + step_1_conversion_time, + step_2_conversion_time , + latest_0, + latest_2, + latest_0 + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY + AND latest_1 <= latest_2 + AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + if(isNotNull(latest_2) + AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + if(latest_2 < latest_1, NULL, latest_2) as latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT e.timestamp as timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, + if(event = 'step one', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'step two', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'step three', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2 + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) )))) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps + HAVING steps = max(max_steps)) + WHERE steps = 1 + ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000) + SELECT DISTINCT person_id AS actor_id + FROM + (SELECT person_id, + path, + conversion_time, + event_in_session_index, + concat(toString(event_in_session_index), '_', path) as path_key, + if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, + path_dropoff_key + FROM + (SELECT person_id , + joined_path_tuple.1 as path , + joined_path_tuple.2 as conversion_time , + joined_path_tuple.3 as prev_path , + event_in_session_index , + session_index , + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , + arrayFilter((x, y) -> y, time, mapping) as timings , + arrayFilter((x, y)->y, path_basic, mapping) as compact_path , + indexOf(compact_path, NULL) as target_index , + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , + arraySlice(filtered_path, 1, 5) as limited_path , + arraySlice(filtered_timings, 1, 5) as limited_timings , + arrayDifference(limited_timings) as timings_diff , + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + FROM + (SELECT person_id , + path_time_tuple.1 as path_basic , + path_time_tuple.2 as time , + session_index , + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths + FROM + (SELECT person_id, + groupArray(toUnixTimestamp64Milli(timestamp)) as timing, + groupArray(path_item) as paths + FROM + (SELECT e.timestamp AS timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS person_id, + funnel_actors.timestamp AS target_timestamp, + ifNull(if(equals(event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), event)), '') AS path_item_ungrouped, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + if(group_index > 0, NULL[group_index], path_item_ungrouped) AS path_item + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 99999 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + JOIN funnel_actors ON funnel_actors.actor_id = if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND (has(['technology'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) + AND e.timestamp >= target_timestamp + ORDER BY if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id), + e.timestamp) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE last_path_key = '4_step branch' + AND path_key = '3_step dropoff2' + ORDER BY person_id + LIMIT 100 + OFFSET 0 + ''' +# --- +# name: TestClickhousePaths.test_by_funnel_after_step + ''' + WITH funnel_actors AS + (SELECT aggregation_target AS actor_id , timestamp + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + avg(step_2_conversion_time) step_2_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner, + median(step_2_conversion_time) step_2_median_conversion_time_inner , + argMax(latest_1, steps) as timestamp, + argMax(latest_2, steps) as final_timestamp, + argMax(latest_0, steps) as first_timestamp + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target) as max_steps, + step_1_conversion_time, + step_2_conversion_time , + latest_1, + latest_2, + latest_0 + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY + AND latest_1 <= latest_2 + AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + if(isNotNull(latest_2) + AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + if(latest_2 < latest_1, NULL, latest_2) as latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT e.timestamp as timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, + if(event = 'step one', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'step two', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'step three', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2 + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) )))) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps + HAVING steps = max(max_steps)) + WHERE steps IN [2, 3] + ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000) + SELECT last_path_key as source_event, + path_key as target_event, + COUNT(*) AS event_count, + avg(conversion_time) AS average_conversion_time + FROM + (SELECT person_id, + path, + conversion_time, + event_in_session_index, + concat(toString(event_in_session_index), '_', path) as path_key, + if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, + path_dropoff_key + FROM + (SELECT person_id , + joined_path_tuple.1 as path , + joined_path_tuple.2 as conversion_time , + joined_path_tuple.3 as prev_path , + event_in_session_index , + session_index , + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , + arrayFilter((x, y) -> y, time, mapping) as timings , + arrayFilter((x, y)->y, path_basic, mapping) as compact_path , + indexOf(compact_path, NULL) as target_index , + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , + arraySlice(filtered_path, 1, 5) as limited_path , + arraySlice(filtered_timings, 1, 5) as limited_timings , + arrayDifference(limited_timings) as timings_diff , + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + FROM + (SELECT person_id , + path_time_tuple.1 as path_basic , + path_time_tuple.2 as time , + session_index , + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths + FROM + (SELECT person_id, + groupArray(toUnixTimestamp64Milli(timestamp)) as timing, + groupArray(path_item) as paths + FROM + (SELECT e.timestamp AS timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS person_id, + funnel_actors.timestamp AS target_timestamp, + ifNull(if(equals(event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), event)), '') AS path_item_ungrouped, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + if(group_index > 0, NULL[group_index], path_item_ungrouped) AS path_item + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + JOIN funnel_actors ON funnel_actors.actor_id = if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND e.timestamp >= target_timestamp + ORDER BY if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id), + e.timestamp) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE source_event IS NOT NULL + GROUP BY source_event, + target_event + ORDER BY event_count DESC, + source_event, + target_event + LIMIT 50 + ''' +# --- +# name: TestClickhousePaths.test_by_funnel_after_step_limit + ''' + WITH funnel_actors AS + (SELECT aggregation_target AS actor_id , timestamp + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + avg(step_2_conversion_time) step_2_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner, + median(step_2_conversion_time) step_2_median_conversion_time_inner , + argMax(latest_1, steps) as timestamp, + argMax(latest_2, steps) as final_timestamp, + argMax(latest_0, steps) as first_timestamp + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target) as max_steps, + step_1_conversion_time, + step_2_conversion_time , + latest_1, + latest_2, + latest_0 + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY + AND latest_1 <= latest_2 + AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + if(isNotNull(latest_2) + AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + if(latest_2 < latest_1, NULL, latest_2) as latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT e.timestamp as timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, + if(event = 'step one', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'step two', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'step three', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2 + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) )))) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps + HAVING steps = max(max_steps)) + WHERE steps IN [2, 3] + ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000) + SELECT last_path_key as source_event, + path_key as target_event, + COUNT(*) AS event_count, + avg(conversion_time) AS average_conversion_time + FROM + (SELECT person_id, + path, + conversion_time, + event_in_session_index, + concat(toString(event_in_session_index), '_', path) as path_key, + if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, + path_dropoff_key + FROM + (SELECT person_id , + joined_path_tuple.1 as path , + joined_path_tuple.2 as conversion_time , + joined_path_tuple.3 as prev_path , + event_in_session_index , + session_index , + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , + arrayFilter((x, y) -> y, time, mapping) as timings , + arrayFilter((x, y)->y, path_basic, mapping) as compact_path , + indexOf(compact_path, NULL) as target_index , + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , + arraySlice(filtered_path, 1, 5) as limited_path , + arraySlice(filtered_timings, 1, 5) as limited_timings , + arrayDifference(limited_timings) as timings_diff , + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + FROM + (SELECT person_id , + path_time_tuple.1 as path_basic , + path_time_tuple.2 as time , + session_index , + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths + FROM + (SELECT person_id, + groupArray(toUnixTimestamp64Milli(timestamp)) as timing, + groupArray(path_item) as paths + FROM + (SELECT e.timestamp AS timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS person_id, + funnel_actors.timestamp AS target_timestamp, + ifNull(if(equals(event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), event)), '') AS path_item_ungrouped, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + if(group_index > 0, NULL[group_index], path_item_ungrouped) AS path_item + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + JOIN funnel_actors ON funnel_actors.actor_id = if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND e.timestamp >= target_timestamp + ORDER BY if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id), + e.timestamp) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE source_event IS NOT NULL + GROUP BY source_event, + target_event + ORDER BY event_count DESC, + source_event, + target_event + LIMIT 50 + ''' +# --- +# name: TestClickhousePaths.test_by_funnel_before_dropoff + ''' + WITH funnel_actors AS + (SELECT aggregation_target AS actor_id , timestamp + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + avg(step_2_conversion_time) step_2_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner, + median(step_2_conversion_time) step_2_median_conversion_time_inner , + argMax(latest_1, steps) as timestamp, + argMax(latest_2, steps) as final_timestamp, + argMax(latest_0, steps) as first_timestamp + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target) as max_steps, + step_1_conversion_time, + step_2_conversion_time , + latest_1, + latest_2, + latest_0 + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY + AND latest_1 <= latest_2 + AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + if(isNotNull(latest_2) + AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + if(latest_2 < latest_1, NULL, latest_2) as latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT e.timestamp as timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, + if(event = 'step one', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'step two', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'step three', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2 + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) )))) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps + HAVING steps = max(max_steps)) + WHERE steps = 2 + ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000) + SELECT last_path_key as source_event, + path_key as target_event, + COUNT(*) AS event_count, + avg(conversion_time) AS average_conversion_time + FROM + (SELECT person_id, + path, + conversion_time, + event_in_session_index, + concat(toString(event_in_session_index), '_', path) as path_key, + if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, + path_dropoff_key + FROM + (SELECT person_id , + joined_path_tuple.1 as path , + joined_path_tuple.2 as conversion_time , + joined_path_tuple.3 as prev_path , + event_in_session_index , + session_index , + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , + arrayFilter((x, y) -> y, time, mapping) as timings , + arrayFilter((x, y)->y, path_basic, mapping) as compact_path , + indexOf(compact_path, NULL) as target_index , + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , + arraySlice(filtered_path, 1, 5) as limited_path , + arraySlice(filtered_timings, 1, 5) as limited_timings , + arrayDifference(limited_timings) as timings_diff , + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + FROM + (SELECT person_id , + path_time_tuple.1 as path_basic , + path_time_tuple.2 as time , + session_index , + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths + FROM + (SELECT person_id, + groupArray(toUnixTimestamp64Milli(timestamp)) as timing, + groupArray(path_item) as paths + FROM + (SELECT e.timestamp AS timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS person_id, + funnel_actors.timestamp AS target_timestamp, + ifNull(if(equals(event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), event)), '') AS path_item_ungrouped, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + if(group_index > 0, NULL[group_index], path_item_ungrouped) AS path_item + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + JOIN funnel_actors ON funnel_actors.actor_id = if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND e.timestamp <= target_timestamp + INTERVAL 7 DAY + ORDER BY if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id), + e.timestamp) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE source_event IS NOT NULL + GROUP BY source_event, + target_event + ORDER BY event_count DESC, + source_event, + target_event + LIMIT 50 + ''' +# --- +# name: TestClickhousePaths.test_by_funnel_before_step + ''' + WITH funnel_actors AS + (SELECT aggregation_target AS actor_id , timestamp + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + avg(step_2_conversion_time) step_2_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner, + median(step_2_conversion_time) step_2_median_conversion_time_inner , + argMax(latest_1, steps) as timestamp, + argMax(latest_2, steps) as final_timestamp, + argMax(latest_0, steps) as first_timestamp + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target) as max_steps, + step_1_conversion_time, + step_2_conversion_time , + latest_1, + latest_2, + latest_0 + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY + AND latest_1 <= latest_2 + AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + if(isNotNull(latest_2) + AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + if(latest_2 < latest_1, NULL, latest_2) as latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT e.timestamp as timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, + if(event = 'step one', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'step two', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'step three', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2 + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) )))) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps + HAVING steps = max(max_steps)) + WHERE steps IN [2, 3] + ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000) + SELECT last_path_key as source_event, + path_key as target_event, + COUNT(*) AS event_count, + avg(conversion_time) AS average_conversion_time + FROM + (SELECT person_id, + path, + conversion_time, + event_in_session_index, + concat(toString(event_in_session_index), '_', path) as path_key, + if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, + path_dropoff_key + FROM + (SELECT person_id , + joined_path_tuple.1 as path , + joined_path_tuple.2 as conversion_time , + joined_path_tuple.3 as prev_path , + event_in_session_index , + session_index , + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , + arrayFilter((x, y) -> y, time, mapping) as timings , + arrayFilter((x, y)->y, path_basic, mapping) as compact_path , + indexOf(compact_path, NULL) as target_index , + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , + arraySlice(filtered_path, 1, 5) as limited_path , + arraySlice(filtered_timings, 1, 5) as limited_timings , + arrayDifference(limited_timings) as timings_diff , + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + FROM + (SELECT person_id , + path_time_tuple.1 as path_basic , + path_time_tuple.2 as time , + session_index , + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths + FROM + (SELECT person_id, + groupArray(toUnixTimestamp64Milli(timestamp)) as timing, + groupArray(path_item) as paths + FROM + (SELECT e.timestamp AS timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS person_id, + funnel_actors.timestamp AS target_timestamp, + ifNull(if(equals(event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), event)), '') AS path_item_ungrouped, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + if(group_index > 0, NULL[group_index], path_item_ungrouped) AS path_item + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + JOIN funnel_actors ON funnel_actors.actor_id = if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND e.timestamp <= target_timestamp + ORDER BY if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id), + e.timestamp) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE source_event IS NOT NULL + GROUP BY source_event, + target_event + ORDER BY event_count DESC, + source_event, + target_event + LIMIT 50 + ''' +# --- +# name: TestClickhousePaths.test_by_funnel_between_step + ''' + WITH funnel_actors AS + (SELECT aggregation_target AS actor_id, + max_timestamp, + min_timestamp + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + avg(step_2_conversion_time) step_2_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner, + median(step_2_conversion_time) step_2_median_conversion_time_inner , + argMax(latest_1, steps) as max_timestamp, + argMax(latest_0, steps) as min_timestamp + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target) as max_steps, + step_1_conversion_time, + step_2_conversion_time , + latest_1, + latest_0 + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY + AND latest_1 <= latest_2 + AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + if(isNotNull(latest_2) + AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + if(latest_2 < latest_1, NULL, latest_2) as latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT e.timestamp as timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, + if(event = 'step one', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'step two', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'step three', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2 + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) )))) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps + HAVING steps = max(max_steps)) + WHERE steps IN [2, 3] + ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000) + SELECT last_path_key as source_event, + path_key as target_event, + COUNT(*) AS event_count, + avg(conversion_time) AS average_conversion_time + FROM + (SELECT person_id, + path, + conversion_time, + event_in_session_index, + concat(toString(event_in_session_index), '_', path) as path_key, + if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, + path_dropoff_key + FROM + (SELECT person_id , + joined_path_tuple.1 as path , + joined_path_tuple.2 as conversion_time , + joined_path_tuple.3 as prev_path , + event_in_session_index , + session_index , + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , + arrayFilter((x, y) -> y, time, mapping) as timings , + arrayFilter((x, y)->y, path_basic, mapping) as compact_path , + indexOf(compact_path, NULL) as target_index , + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , + arraySlice(filtered_path, 1, 5) as limited_path , + arraySlice(filtered_timings, 1, 5) as limited_timings , + arrayDifference(limited_timings) as timings_diff , + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + FROM + (SELECT person_id , + path_time_tuple.1 as path_basic , + path_time_tuple.2 as time , + session_index , + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths + FROM + (SELECT person_id, + groupArray(toUnixTimestamp64Milli(timestamp)) as timing, + groupArray(path_item) as paths + FROM + (SELECT e.timestamp AS timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS person_id, + funnel_actors.min_timestamp as min_timestamp, + funnel_actors.max_timestamp as max_timestamp, + ifNull(if(equals(event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), event)), '') AS path_item_ungrouped, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + if(group_index > 0, NULL[group_index], path_item_ungrouped) AS path_item + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + JOIN funnel_actors ON funnel_actors.actor_id = if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND e.timestamp >= min_timestamp + AND e.timestamp <= max_timestamp + ORDER BY if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id), + e.timestamp) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE source_event IS NOT NULL + GROUP BY source_event, + target_event + ORDER BY event_count DESC, + source_event, + target_event + LIMIT 50 + ''' +# --- +# name: TestClickhousePaths.test_by_funnel_between_step.1 + ''' + WITH funnel_actors AS + (SELECT aggregation_target AS actor_id, + max_timestamp, + min_timestamp + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + avg(step_2_conversion_time) step_2_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner, + median(step_2_conversion_time) step_2_median_conversion_time_inner , + argMax(latest_1, steps) as max_timestamp, + argMax(latest_0, steps) as min_timestamp + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target) as max_steps, + step_1_conversion_time, + step_2_conversion_time , + latest_1, + latest_0 + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY + AND latest_1 <= latest_2 + AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + if(isNotNull(latest_2) + AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + if(latest_2 < latest_1, NULL, latest_2) as latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT e.timestamp as timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, + if(event = 'step one', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'step two', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'step three', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2 + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) )))) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps + HAVING steps = max(max_steps)) + WHERE steps IN [2, 3] + ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000) + SELECT DISTINCT person_id AS actor_id + FROM + (SELECT person_id, + path, + conversion_time, + event_in_session_index, + concat(toString(event_in_session_index), '_', path) as path_key, + if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, + path_dropoff_key + FROM + (SELECT person_id , + joined_path_tuple.1 as path , + joined_path_tuple.2 as conversion_time , + joined_path_tuple.3 as prev_path , + event_in_session_index , + session_index , + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , + arrayFilter((x, y) -> y, time, mapping) as timings , + arrayFilter((x, y)->y, path_basic, mapping) as compact_path , + indexOf(compact_path, NULL) as target_index , + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , + arraySlice(filtered_path, 1, 5) as limited_path , + arraySlice(filtered_timings, 1, 5) as limited_timings , + arrayDifference(limited_timings) as timings_diff , + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + FROM + (SELECT person_id , + path_time_tuple.1 as path_basic , + path_time_tuple.2 as time , + session_index , + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths + FROM + (SELECT person_id, + groupArray(toUnixTimestamp64Milli(timestamp)) as timing, + groupArray(path_item) as paths + FROM + (SELECT e.timestamp AS timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS person_id, + funnel_actors.min_timestamp as min_timestamp, + funnel_actors.max_timestamp as max_timestamp, + ifNull(if(equals(event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), event)), '') AS path_item_ungrouped, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + if(group_index > 0, NULL[group_index], path_item_ungrouped) AS path_item + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + JOIN funnel_actors ON funnel_actors.actor_id = if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND e.timestamp >= min_timestamp + AND e.timestamp <= max_timestamp + ORDER BY if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id), + e.timestamp) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE last_path_key = '1_step one' + AND path_key = '2_between_step_1_a' + ORDER BY person_id + LIMIT 100 + OFFSET 0 + ''' +# --- +# name: TestClickhousePaths.test_by_funnel_between_step.2 + ''' + WITH funnel_actors AS + (SELECT aggregation_target AS actor_id, + max_timestamp, + min_timestamp + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + avg(step_2_conversion_time) step_2_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner, + median(step_2_conversion_time) step_2_median_conversion_time_inner , + argMax(latest_1, steps) as max_timestamp, + argMax(latest_0, steps) as min_timestamp + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target) as max_steps, + step_1_conversion_time, + step_2_conversion_time , + latest_1, + latest_0 + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY + AND latest_1 <= latest_2 + AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + if(isNotNull(latest_2) + AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + if(latest_2 < latest_1, NULL, latest_2) as latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT e.timestamp as timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, + if(event = 'step one', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'step two', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'step three', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2 + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) )))) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps + HAVING steps = max(max_steps)) + WHERE steps IN [2, 3] + ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000) + SELECT DISTINCT person_id AS actor_id + FROM + (SELECT person_id, + path, + conversion_time, + event_in_session_index, + concat(toString(event_in_session_index), '_', path) as path_key, + if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, + path_dropoff_key + FROM + (SELECT person_id , + joined_path_tuple.1 as path , + joined_path_tuple.2 as conversion_time , + joined_path_tuple.3 as prev_path , + event_in_session_index , + session_index , + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , + arrayFilter((x, y) -> y, time, mapping) as timings , + arrayFilter((x, y)->y, path_basic, mapping) as compact_path , + indexOf(compact_path, NULL) as target_index , + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , + arraySlice(filtered_path, 1, 5) as limited_path , + arraySlice(filtered_timings, 1, 5) as limited_timings , + arrayDifference(limited_timings) as timings_diff , + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + FROM + (SELECT person_id , + path_time_tuple.1 as path_basic , + path_time_tuple.2 as time , + session_index , + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths + FROM + (SELECT person_id, + groupArray(toUnixTimestamp64Milli(timestamp)) as timing, + groupArray(path_item) as paths + FROM + (SELECT e.timestamp AS timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS person_id, + funnel_actors.min_timestamp as min_timestamp, + funnel_actors.max_timestamp as max_timestamp, + ifNull(if(equals(event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), event)), '') AS path_item_ungrouped, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + if(group_index > 0, NULL[group_index], path_item_ungrouped) AS path_item + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + JOIN funnel_actors ON funnel_actors.actor_id = if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND e.timestamp >= min_timestamp + AND e.timestamp <= max_timestamp + ORDER BY if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id), + e.timestamp) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE last_path_key = '2_between_step_1_a' + AND path_key = '3_between_step_1_b' + ORDER BY person_id + LIMIT 100 + OFFSET 0 + ''' +# --- +# name: TestClickhousePaths.test_by_funnel_between_step.3 + ''' + WITH funnel_actors AS + (SELECT aggregation_target AS actor_id, + max_timestamp, + min_timestamp + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + avg(step_2_conversion_time) step_2_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner, + median(step_2_conversion_time) step_2_median_conversion_time_inner , + argMax(latest_1, steps) as max_timestamp, + argMax(latest_0, steps) as min_timestamp + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target) as max_steps, + step_1_conversion_time, + step_2_conversion_time , + latest_1, + latest_0 + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY + AND latest_1 <= latest_2 + AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + if(isNotNull(latest_2) + AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + if(latest_2 < latest_1, NULL, latest_2) as latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT e.timestamp as timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, + if(event = 'step one', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'step two', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'step three', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2 + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) )))) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps + HAVING steps = max(max_steps)) + WHERE steps IN [2, 3] + ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000) + SELECT DISTINCT person_id AS actor_id + FROM + (SELECT person_id, + path, + conversion_time, + event_in_session_index, + concat(toString(event_in_session_index), '_', path) as path_key, + if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, + path_dropoff_key + FROM + (SELECT person_id , + joined_path_tuple.1 as path , + joined_path_tuple.2 as conversion_time , + joined_path_tuple.3 as prev_path , + event_in_session_index , + session_index , + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , + arrayFilter((x, y) -> y, time, mapping) as timings , + arrayFilter((x, y)->y, path_basic, mapping) as compact_path , + indexOf(compact_path, NULL) as target_index , + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , + arraySlice(filtered_path, 1, 5) as limited_path , + arraySlice(filtered_timings, 1, 5) as limited_timings , + arrayDifference(limited_timings) as timings_diff , + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + FROM + (SELECT person_id , + path_time_tuple.1 as path_basic , + path_time_tuple.2 as time , + session_index , + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths + FROM + (SELECT person_id, + groupArray(toUnixTimestamp64Milli(timestamp)) as timing, + groupArray(path_item) as paths + FROM + (SELECT e.timestamp AS timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS person_id, + funnel_actors.min_timestamp as min_timestamp, + funnel_actors.max_timestamp as max_timestamp, + ifNull(if(equals(event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), event)), '') AS path_item_ungrouped, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + if(group_index > 0, NULL[group_index], path_item_ungrouped) AS path_item + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + JOIN funnel_actors ON funnel_actors.actor_id = if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND e.timestamp >= min_timestamp + AND e.timestamp <= max_timestamp + ORDER BY if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id), + e.timestamp) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE last_path_key = '3_between_step_1_b' + AND path_key = '4_step two' + ORDER BY person_id + LIMIT 100 + OFFSET 0 + ''' +# --- +# name: TestClickhousePaths.test_by_funnel_between_step.4 + ''' + WITH funnel_actors AS + (SELECT aggregation_target AS actor_id, + max_timestamp, + min_timestamp + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + avg(step_2_conversion_time) step_2_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner, + median(step_2_conversion_time) step_2_median_conversion_time_inner , + argMax(latest_1, steps) as max_timestamp, + argMax(latest_0, steps) as min_timestamp + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target) as max_steps, + step_1_conversion_time, + step_2_conversion_time , + latest_1, + latest_0 + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY + AND latest_1 <= latest_2 + AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + if(isNotNull(latest_2) + AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + if(latest_2 < latest_1, NULL, latest_2) as latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT e.timestamp as timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, + if(event = 'step one', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'step two', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'step three', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2 + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) )))) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps + HAVING steps = max(max_steps)) + WHERE steps IN [2, 3] + ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000) + SELECT DISTINCT person_id AS actor_id + FROM + (SELECT person_id, + path, + conversion_time, + event_in_session_index, + concat(toString(event_in_session_index), '_', path) as path_key, + if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, + path_dropoff_key + FROM + (SELECT person_id , + joined_path_tuple.1 as path , + joined_path_tuple.2 as conversion_time , + joined_path_tuple.3 as prev_path , + event_in_session_index , + session_index , + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , + arrayFilter((x, y) -> y, time, mapping) as timings , + arrayFilter((x, y)->y, path_basic, mapping) as compact_path , + indexOf(compact_path, NULL) as target_index , + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , + arraySlice(filtered_path, 1, 5) as limited_path , + arraySlice(filtered_timings, 1, 5) as limited_timings , + arrayDifference(limited_timings) as timings_diff , + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + FROM + (SELECT person_id , + path_time_tuple.1 as path_basic , + path_time_tuple.2 as time , + session_index , + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths + FROM + (SELECT person_id, + groupArray(toUnixTimestamp64Milli(timestamp)) as timing, + groupArray(path_item) as paths + FROM + (SELECT e.timestamp AS timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS person_id, + funnel_actors.min_timestamp as min_timestamp, + funnel_actors.max_timestamp as max_timestamp, + ifNull(if(equals(event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), event)), '') AS path_item_ungrouped, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + if(group_index > 0, NULL[group_index], path_item_ungrouped) AS path_item + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + JOIN funnel_actors ON funnel_actors.actor_id = if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND e.timestamp >= min_timestamp + AND e.timestamp <= max_timestamp + ORDER BY if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id), + e.timestamp) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE last_path_key = '3_between_step_1_b' + AND path_key = '4_between_step_1_c' + ORDER BY person_id + LIMIT 100 + OFFSET 0 + ''' +# --- +# name: TestClickhousePaths.test_by_funnel_between_step.5 + ''' + WITH funnel_actors AS + (SELECT aggregation_target AS actor_id, + max_timestamp, + min_timestamp + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + avg(step_2_conversion_time) step_2_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner, + median(step_2_conversion_time) step_2_median_conversion_time_inner , + argMax(latest_1, steps) as max_timestamp, + argMax(latest_0, steps) as min_timestamp + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target) as max_steps, + step_1_conversion_time, + step_2_conversion_time , + latest_1, + latest_0 + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY + AND latest_1 <= latest_2 + AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + if(isNotNull(latest_2) + AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + if(latest_2 < latest_1, NULL, latest_2) as latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT e.timestamp as timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, + if(event = 'step one', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'step two', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'step three', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2 + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + WHERE team_id = 99999 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) )))) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps + HAVING steps = max(max_steps)) + WHERE steps IN [2, 3] + ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000) + SELECT DISTINCT person_id AS actor_id + FROM + (SELECT person_id, + path, + conversion_time, + event_in_session_index, + concat(toString(event_in_session_index), '_', path) as path_key, + if(event_in_session_index > 1, concat(toString(event_in_session_index-1), '_', prev_path), null) AS last_path_key, + path_dropoff_key + FROM + (SELECT person_id , + joined_path_tuple.1 as path , + joined_path_tuple.2 as conversion_time , + joined_path_tuple.3 as prev_path , + event_in_session_index , + session_index , + arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0 , + arrayMap((x, y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping , + arrayFilter((x, y) -> y, time, mapping) as timings , + arrayFilter((x, y)->y, path_basic, mapping) as compact_path , + indexOf(compact_path, NULL) as target_index , + if(target_index > 0, arraySlice(compact_path, target_index), compact_path) as filtered_path , + if(target_index > 0, arraySlice(timings, target_index), timings) as filtered_timings , + arraySlice(filtered_path, 1, 5) as limited_path , + arraySlice(filtered_timings, 1, 5) as limited_timings , + arrayDifference(limited_timings) as timings_diff , + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings , + concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key + FROM + (SELECT person_id , + path_time_tuple.1 as path_basic , + path_time_tuple.2 as time , + session_index , + arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple , + arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(x.3 / 1000) < toDateTime('2018-01-01') + INTERVAL 7 DAY, 0, 1), paths_tuple) as session_paths + FROM + (SELECT person_id, + groupArray(toUnixTimestamp64Milli(timestamp)) as timing, + groupArray(path_item) as paths + FROM + (SELECT e.timestamp AS timestamp, + if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS person_id, + funnel_actors.min_timestamp as min_timestamp, + funnel_actors.max_timestamp as max_timestamp, + ifNull(if(equals(event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), event)), '') AS path_item_ungrouped, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + if(group_index > 0, NULL[group_index], path_item_ungrouped) AS path_item + FROM events e + LEFT OUTER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 99999 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + JOIN funnel_actors ON funnel_actors.actor_id = if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) + WHERE team_id = 99999 + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND e.timestamp >= min_timestamp + AND e.timestamp <= max_timestamp + ORDER BY if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id), + e.timestamp) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE last_path_key = '4_between_step_1_c' + AND path_key = '5_step two' + ORDER BY person_id + LIMIT 100 + OFFSET 0 + ''' +# --- # name: TestClickhousePaths.test_end '''