From 2bd43f20e6032a315d00007bb89d248b538e184e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Oberm=C3=BCller?= Date: Tue, 12 Mar 2024 16:07:57 +0100 Subject: [PATCH] feat(hogql): add funnel correlation actor queries (#20726) --- frontend/src/queries/schema.json | 50 +- frontend/src/queries/schema.ts | 18 +- .../src/scenes/funnels/FunnelLineGraph.tsx | 2 +- .../scenes/funnels/funnelCorrelationLogic.ts | 4 +- .../scenes/funnels/funnelPersonsModalLogic.ts | 164 +- .../funnels/funnelPropertyCorrelationLogic.ts | 2 +- .../scenes/saved-insights/SavedInsights.tsx | 12 + .../trends/persons-modal/personsModalLogic.ts | 4 +- .../funnel_correlation_query_runner.py | 128 + .../test_funnel_correlation.ambr | 5358 +++++++++++++++-- .../test_funnel_correlations_persons.ambr | 776 +++ .../funnels/test/test_funnel_correlation.py | 408 +- .../test/test_funnel_correlations_persons.py | 647 ++ .../hogql_queries/insights/funnels/utils.py | 17 +- .../insights/insight_actors_query_runner.py | 21 +- posthog/hogql_queries/query_runner.py | 6 +- posthog/schema.py | 41 +- posthog/types.py | 3 +- 18 files changed, 6807 insertions(+), 854 deletions(-) create mode 100644 posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlations_persons.ambr create mode 100644 posthog/hogql_queries/insights/funnels/test/test_funnel_correlations_persons.py diff --git a/frontend/src/queries/schema.json b/frontend/src/queries/schema.json index daa02a390600f..250b62893c602 100644 --- a/frontend/src/queries/schema.json +++ b/frontend/src/queries/schema.json @@ -120,6 +120,9 @@ { "$ref": "#/definitions/FunnelsActorsQuery" }, + { + "$ref": "#/definitions/FunnelCorrelationActorsQuery" + }, { "$ref": "#/definitions/HogQLQuery" } @@ -1564,6 +1567,38 @@ "enum": ["second", "minute", "hour", "day", "week", "month"], "type": "string" }, + "FunnelCorrelationActorsQuery": { + "additionalProperties": false, + "properties": { + "funnelCorrelationPersonConverted": { + "type": "boolean" + }, + "funnelCorrelationPersonEntity": { + "$ref": "#/definitions/AnyEntityNode" + }, + "funnelCorrelationPropertyValues": { + "items": { + "$ref": "#/definitions/AnyPropertyFilter" + }, + "type": "array" + }, + "includeRecordings": { + "type": "boolean" + }, + "kind": { + "const": "FunnelCorrelationActorsQuery", + "type": "string" + }, + "response": { + "$ref": "#/definitions/ActorsQueryResponse" + }, + "source": { + "$ref": "#/definitions/FunnelCorrelationQuery" + } + }, + "required": ["kind", "source"], + "type": "object" + }, "FunnelCorrelationQuery": { "additionalProperties": false, "properties": { @@ -1643,9 +1678,7 @@ "type": "array" }, "types": { - "items": { - "type": "string" - }, + "items": {}, "type": "array" } }, @@ -1983,7 +2016,7 @@ "type": "boolean" }, "kind": { - "const": "InsightActorsQuery", + "const": "FunnelsActorsQuery", "type": "string" }, "response": { @@ -2663,6 +2696,9 @@ }, { "$ref": "#/definitions/FunnelsActorsQuery" + }, + { + "$ref": "#/definitions/FunnelCorrelationActorsQuery" } ] } @@ -3118,6 +3154,8 @@ "HogQLMetadata", "HogQLAutocomplete", "ActorsQuery", + "FunnelsActorsQuery", + "FunnelCorrelationActorsQuery", "SessionsTimelineQuery", "DataTableNode", "DataVisualizationNode", @@ -4266,9 +4304,7 @@ "type": "array" }, "types": { - "items": { - "type": "string" - }, + "items": {}, "type": "array" } }, diff --git a/frontend/src/queries/schema.ts b/frontend/src/queries/schema.ts index 7b9920ca3b8d8..831bb80589d78 100644 --- a/frontend/src/queries/schema.ts +++ b/frontend/src/queries/schema.ts @@ -53,6 +53,8 @@ export enum NodeKind { HogQLMetadata = 'HogQLMetadata', HogQLAutocomplete = 'HogQLAutocomplete', ActorsQuery = 'ActorsQuery', + FunnelsActorsQuery = 'FunnelsActorsQuery', + FunnelCorrelationActorsQuery = 'FunnelCorrelationActorsQuery', SessionsTimelineQuery = 'SessionsTimelineQuery', // Interface nodes @@ -921,7 +923,7 @@ export interface ActorsQueryResponse { export interface ActorsQuery extends DataNode { kind: NodeKind.ActorsQuery - source?: InsightActorsQuery | FunnelsActorsQuery | HogQLQuery + source?: InsightActorsQuery | FunnelsActorsQuery | FunnelCorrelationActorsQuery | HogQLQuery select?: HogQLExpression[] search?: string properties?: AnyPropertyFilter[] @@ -1090,7 +1092,7 @@ export interface InsightActorsQuery @@ -1127,7 +1137,7 @@ export interface FunnelCorrelationResult { export interface FunnelCorrelationResponse { results: FunnelCorrelationResult columns?: any[] - types?: string[] + types?: any[] hogql?: string timings?: QueryTiming[] hasMore?: boolean @@ -1188,7 +1198,7 @@ export interface InsightActorsQueryOptionsResponse { export interface InsightActorsQueryOptions { kind: NodeKind.InsightActorsQueryOptions - source: InsightActorsQuery | FunnelsActorsQuery + source: InsightActorsQuery | FunnelsActorsQuery | FunnelCorrelationActorsQuery response?: InsightActorsQueryOptionsResponse } diff --git a/frontend/src/scenes/funnels/FunnelLineGraph.tsx b/frontend/src/scenes/funnels/FunnelLineGraph.tsx index ee879703a2b67..12a7aa57f0404 100644 --- a/frontend/src/scenes/funnels/FunnelLineGraph.tsx +++ b/frontend/src/scenes/funnels/FunnelLineGraph.tsx @@ -91,7 +91,7 @@ export function FunnelLineGraph({ if (hogQLInsightsFunnelsFlagEnabled) { const query: FunnelsActorsQuery = { - kind: NodeKind.InsightActorsQuery, + kind: NodeKind.FunnelsActorsQuery, source: querySource, funnelTrendsDropOff: false, funnelTrendsEntrancePeriodStart: dayjs(day).format('YYYY-MM-DD HH:mm:ss'), diff --git a/frontend/src/scenes/funnels/funnelCorrelationLogic.ts b/frontend/src/scenes/funnels/funnelCorrelationLogic.ts index b56242eed27ea..d76e569d1f2d6 100644 --- a/frontend/src/scenes/funnels/funnelCorrelationLogic.ts +++ b/frontend/src/scenes/funnels/funnelCorrelationLogic.ts @@ -48,7 +48,7 @@ export const funnelCorrelationLogic = kea([ try { if (values.hogQLInsightsFunnelsFlagEnabled) { const actorsQuery: FunnelsActorsQuery = { - kind: NodeKind.InsightActorsQuery, + kind: NodeKind.FunnelsActorsQuery, source: values.querySource!, } const query: FunnelCorrelationQuery = { @@ -93,7 +93,7 @@ export const funnelCorrelationLogic = kea([ loadEventWithPropertyCorrelations: async (eventName: string) => { if (values.hogQLInsightsFunnelsFlagEnabled) { const actorsQuery: FunnelsActorsQuery = { - kind: NodeKind.InsightActorsQuery, + kind: NodeKind.FunnelsActorsQuery, source: values.querySource!, } const query: FunnelCorrelationQuery = { diff --git a/frontend/src/scenes/funnels/funnelPersonsModalLogic.ts b/frontend/src/scenes/funnels/funnelPersonsModalLogic.ts index f30f8ee2a4ad9..affc4dde01414 100644 --- a/frontend/src/scenes/funnels/funnelPersonsModalLogic.ts +++ b/frontend/src/scenes/funnels/funnelPersonsModalLogic.ts @@ -1,16 +1,26 @@ import { actions, connect, kea, key, listeners, path, props, selectors } from 'kea' +import { elementsToAction } from 'scenes/events/createActionFromEvent' import { insightLogic } from 'scenes/insights/insightLogic' import { keyForInsightLogicProps } from 'scenes/insights/sharedUtils' import { funnelTitle } from 'scenes/trends/persons-modal/persons-modal-utils' import { openPersonsModal } from 'scenes/trends/persons-modal/PersonsModal' -import { FunnelsActorsQuery, NodeKind } from '~/queries/schema' import { + EventsNode, + FunnelCorrelationActorsQuery, + FunnelCorrelationQuery, + FunnelsActorsQuery, + NodeKind, +} from '~/queries/schema' +import { + AnyPropertyFilter, FunnelCorrelation, FunnelCorrelationResultsType, FunnelStep, FunnelStepWithConversionMetrics, InsightLogicProps, + PropertyFilterType, + PropertyOperator, } from '~/types' import { funnelDataLogic } from './funnelDataLogic' @@ -99,7 +109,7 @@ export const funnelPersonsModalLogic = kea([ // openPersonsModalForStep is for the baseline - for breakdown series use openPersonsModalForSeries if (values.hogQLInsightsFunnelsFlagEnabled) { const query: FunnelsActorsQuery = { - kind: NodeKind.InsightActorsQuery, + kind: NodeKind.FunnelsActorsQuery, source: values.querySource!, funnelStep: converted ? stepNo : -stepNo, includeRecordings: true, @@ -131,7 +141,7 @@ export const funnelPersonsModalLogic = kea([ // Version of openPersonsModalForStep that accurately handles breakdown series if (values.hogQLInsightsFunnelsFlagEnabled) { const query: FunnelsActorsQuery = { - kind: NodeKind.InsightActorsQuery, + kind: NodeKind.FunnelsActorsQuery, source: values.querySource!, funnelStep: converted ? stepNo : -stepNo, funnelStepBreakdown: series.breakdown_value === 'Baseline' ? null : series.breakdown_value, @@ -152,26 +162,142 @@ export const funnelPersonsModalLogic = kea([ if (correlation.result_type === FunnelCorrelationResultsType.Properties) { const { breakdown, breakdown_value } = parseBreakdownValue(correlation.event.event) - openPersonsModal({ - url: success ? correlation.success_people_url : correlation.failure_people_url, - title: funnelTitle({ - converted: success, - step: values.steps.length, - breakdown_value, - label: breakdown, - }), + const title = funnelTitle({ + converted: success, + step: values.steps.length, + breakdown_value, + label: breakdown, }) + + if (values.hogQLInsightsFunnelsFlagEnabled) { + // properties + const [propertyName, propertyValue] = correlation.event.event.split('::') + const propType = values.querySource?.aggregation_group_type_index ? 'group' : 'person' + const actorsQuery: FunnelsActorsQuery = { + kind: NodeKind.FunnelsActorsQuery, + source: values.querySource!, + funnelStep: 1, + includeRecordings: true, + } + const correlationQuery: FunnelCorrelationQuery = { + kind: NodeKind.FunnelCorrelationQuery, + source: actorsQuery, + funnelCorrelationType: correlation.result_type, + } + const query: FunnelCorrelationActorsQuery = { + kind: NodeKind.FunnelCorrelationActorsQuery, + source: correlationQuery, + funnelCorrelationPersonConverted: success, + funnelCorrelationPropertyValues: [ + { + key: propertyName, + value: propertyValue, + type: propType === 'person' ? PropertyFilterType.Person : PropertyFilterType.Group, + operator: PropertyOperator.Exact, + group_type_index: values.querySource?.aggregation_group_type_index, + }, + ], + } + openPersonsModal({ + title, + query, + additionalSelect: { matched_recordings: 'matched_recordings' }, + }) + } else { + openPersonsModal({ + url: success ? correlation.success_people_url : correlation.failure_people_url, + title, + }) + } } else { const { name } = parseEventAndProperty(correlation.event) - - openPersonsModal({ - url: success ? correlation.success_people_url : correlation.failure_people_url, - title: funnelTitle({ - converted: success, - step: values.steps.length, - label: name, - }), + const title = funnelTitle({ + converted: success, + step: values.steps.length, + label: name, }) + + if (values.hogQLInsightsFunnelsFlagEnabled) { + const actorsQuery: FunnelsActorsQuery = { + kind: NodeKind.FunnelsActorsQuery, + source: values.querySource!, + funnelStep: 1, + includeRecordings: true, + } + const correlationQuery: FunnelCorrelationQuery = { + kind: NodeKind.FunnelCorrelationQuery, + source: actorsQuery, + funnelCorrelationType: correlation.result_type, + } + + let entity: EventsNode + if (correlation.result_type === FunnelCorrelationResultsType.Events) { + // events + entity = { + event: correlation.event.event, + kind: NodeKind.EventsNode, + } + } else { + // events with properties + const eventName = correlation.event.event.split('::')[0] + + let properties: AnyPropertyFilter[] + if (eventName === '$autocapture') { + // If we have an $autocapture event, we need to + // convert the `elements` chain into an "action". + const elements = correlation.event.elements + const elementsAsAction = elementsToAction(elements) + properties = Object.entries(elementsAsAction) + .map(([propertyKey, propertyValue]) => { + if (propertyValue !== null) { + return { + key: propertyKey, + value: [propertyValue], + type: 'element', + operator: 'exact', + } + } + }) + .filter(Boolean) as AnyPropertyFilter[] + } else { + // eslint-disable-next-line @typescript-eslint/no-unused-vars + const [_, propertyName, propertyValue] = correlation.event.event.split('::') + + properties = [ + { + key: propertyName, + value: propertyValue, + type: PropertyFilterType.Event, + operator: PropertyOperator.Exact, + }, + ] + } + + entity = { + kind: NodeKind.EventsNode, + event: eventName, + properties, + } + } + + const query: FunnelCorrelationActorsQuery = { + kind: NodeKind.FunnelCorrelationActorsQuery, + source: correlationQuery, + funnelCorrelationPersonConverted: success, + funnelCorrelationPersonEntity: entity, + } + + openPersonsModal({ + title, + query, + additionalSelect: { matched_recordings: 'matched_recordings' }, + }) + } else { + openPersonsModal({ + url: success ? correlation.success_people_url : correlation.failure_people_url, + title, + }) + } } }, })), diff --git a/frontend/src/scenes/funnels/funnelPropertyCorrelationLogic.ts b/frontend/src/scenes/funnels/funnelPropertyCorrelationLogic.ts index 6709c1be1dc7e..a12aa0ca2ec08 100644 --- a/frontend/src/scenes/funnels/funnelPropertyCorrelationLogic.ts +++ b/frontend/src/scenes/funnels/funnelPropertyCorrelationLogic.ts @@ -77,7 +77,7 @@ export const funnelPropertyCorrelationLogic = kea = { icon: IconPerson, inMenu: false, }, + [NodeKind.FunnelsActorsQuery]: { + name: 'Persons', + description: 'List of persons matching specified conditions, derived from an insight', + icon: IconPerson, + inMenu: false, + }, + [NodeKind.FunnelCorrelationActorsQuery]: { + name: 'Persons', + description: 'List of persons matching specified conditions, derived from an insight', + icon: IconPerson, + inMenu: false, + }, [NodeKind.DataTableNode]: { name: 'Data table', description: 'Slice and dice your data in a table', diff --git a/frontend/src/scenes/trends/persons-modal/personsModalLogic.ts b/frontend/src/scenes/trends/persons-modal/personsModalLogic.ts index df5270fb2c2dd..dbe81266fb7db 100644 --- a/frontend/src/scenes/trends/persons-modal/personsModalLogic.ts +++ b/frontend/src/scenes/trends/persons-modal/personsModalLogic.ts @@ -14,6 +14,8 @@ import { query as performQuery } from '~/queries/query' import { ActorsQuery, DataTableNode, + FunnelCorrelationActorsQuery, + FunnelsActorsQuery, InsightActorsQuery, InsightActorsQueryOptions, InsightActorsQueryOptionsResponse, @@ -35,7 +37,7 @@ import type { personsModalLogicType } from './personsModalLogicType' const RESULTS_PER_PAGE = 100 export interface PersonModalLogicProps { - query?: InsightActorsQuery | null + query?: InsightActorsQuery | FunnelsActorsQuery | FunnelCorrelationActorsQuery | null url?: string | null additionalSelect?: Partial> orderBy?: string[] diff --git a/posthog/hogql_queries/insights/funnels/funnel_correlation_query_runner.py b/posthog/hogql_queries/insights/funnels/funnel_correlation_query_runner.py index d97e872fd2c5e..945a1b7da5cce 100644 --- a/posthog/hogql_queries/insights/funnels/funnel_correlation_query_runner.py +++ b/posthog/hogql_queries/insights/funnels/funnel_correlation_query_runner.py @@ -4,6 +4,7 @@ from posthog.constants import AUTOCAPTURE_EVENT from posthog.hogql.parser import parse_select +from posthog.hogql.property import property_to_expr from posthog.hogql_queries.insights.funnels.funnel_event_query import FunnelEventQuery from posthog.hogql_queries.insights.funnels.funnel_persons import FunnelActors from posthog.hogql_queries.insights.funnels.funnel_strict_persons import FunnelStrictActors @@ -29,6 +30,7 @@ CorrelationType, EventDefinition, EventsNode, + FunnelCorrelationActorsQuery, FunnelCorrelationQuery, FunnelCorrelationResponse, FunnelCorrelationResult, @@ -87,6 +89,7 @@ class FunnelCorrelationQueryRunner(QueryRunner): query_type = FunnelCorrelationQuery funnels_query: FunnelsQuery actors_query: FunnelsActorsQuery + correlation_actors_query: Optional[FunnelCorrelationActorsQuery] _funnel_actors_generator: FunnelActors | FunnelStrictActors | FunnelUnorderedActors @@ -335,6 +338,131 @@ def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: return self.get_event_query() + def to_actors_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: + assert self.correlation_actors_query is not None + + if self.query.funnelCorrelationType == FunnelCorrelationResultsType.properties: + # Filtering on persons / groups properties can be pushed down to funnel events query + if ( + self.correlation_actors_query.funnelCorrelationPropertyValues + and len(self.correlation_actors_query.funnelCorrelationPropertyValues) > 0 + ): + self.context.query.properties = [ + *(self.context.query.properties or []), + *self.correlation_actors_query.funnelCorrelationPropertyValues, + ] + return self.properties_actor_query() + else: + return self.events_actor_query() + + def events_actor_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: + assert self.correlation_actors_query is not None + + if not self.correlation_actors_query.funnelCorrelationPersonEntity: + raise ValidationError("No entity for persons specified") + + assert isinstance(self.correlation_actors_query.funnelCorrelationPersonEntity, EventsNode) + + target_step = self.context.max_steps + target_event = self.correlation_actors_query.funnelCorrelationPersonEntity.event + funnel_step_names = self._get_funnel_step_names() + funnel_persons_query = self.get_funnel_actors_cte() + funnel_event_query = FunnelEventQuery(context=self.context) + date_from = funnel_event_query._date_range().date_from_as_hogql() + date_to = funnel_event_query._date_range().date_to_as_hogql() + + properties = self.correlation_actors_query.funnelCorrelationPersonEntity.properties + prop_query = None + if properties is not None and properties != []: + prop_query = property_to_expr(properties, self.team) + + conversion_filter = ( + f'AND funnel_actors.steps {"=" if self.correlation_actors_query.funnelCorrelationPersonConverted else "<>"} target_step' + if self.correlation_actors_query.funnelCorrelationPersonConverted is not None + else "" + ) + + event_join_query = self._get_events_join_query() + + recording_event_select_statement = ( + ", any(funnel_actors.matching_events) AS matching_events" if self.actors_query.includeRecordings else "" + ) + + query = parse_select( + f""" + WITH + funnel_actors as ( + {{funnel_persons_query}} + ), + {{date_from}} AS date_from, + {{date_to}} AS date_to, + {target_step} AS target_step, + {funnel_step_names} AS funnel_step_names + SELECT + funnel_actors.actor_id AS actor_id + {recording_event_select_statement} + FROM events AS event + {event_join_query} + AND event.event = '{target_event}' + {conversion_filter} + GROUP BY actor_id + ORDER BY actor_id + """, + placeholders={ + "funnel_persons_query": funnel_persons_query, + "date_from": date_from, + "date_to": date_to, + }, + ) + + if prop_query: + assert isinstance(query, ast.SelectQuery) + assert isinstance(query.where, ast.And) + query.where.exprs = [*query.where.exprs, prop_query] + + return query + + def properties_actor_query( + self, + ) -> ast.SelectQuery | ast.SelectUnionQuery: + assert self.correlation_actors_query is not None + + if not self.correlation_actors_query.funnelCorrelationPropertyValues: + raise ValidationError("Property Correlation expects atleast one Property to get persons for") + + target_step = self.context.max_steps + funnel_persons_query = self.get_funnel_actors_cte() + + conversion_filter = ( + f'funnel_actors.steps {"=" if self.correlation_actors_query.funnelCorrelationPersonConverted else "<>"} target_step' + if self.correlation_actors_query.funnelCorrelationPersonConverted is not None + else "" + ) + + recording_event_select_statement = ( + ", any(funnel_actors.matching_events) AS matching_events" if self.actors_query.includeRecordings else "" + ) + + query = parse_select( + f""" + WITH + funnel_actors as ( + {{funnel_persons_query}} + ), + {target_step} AS target_step + SELECT + funnel_actors.actor_id AS actor_id + {recording_event_select_statement} + FROM funnel_actors + WHERE {conversion_filter} + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id + """, + placeholders={"funnel_persons_query": funnel_persons_query}, + ) + + return query + def get_event_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: funnel_persons_query = self.get_funnel_actors_cte() event_join_query = self._get_events_join_query() diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlation.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlation.ambr index 4efa424dd1c75..5019f0f57f7b7 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlation.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlation.ambr @@ -306,17 +306,23 @@ allow_experimental_object_type=1 ''' # --- -# name: TestClickhouseFunnelCorrelation.test_basic_funnel_correlation_with_properties_materialized +# name: TestClickhouseFunnelCorrelation.test_basic_funnel_correlation_with_properties.1 ''' - SELECT concat(ifNull(toString((aggregation_target_with_props.prop).1), ''), '::', ifNull(toString((aggregation_target_with_props.prop).2), '')) AS name, - countIf(aggregation_target_with_props.actor_id, ifNull(equals(aggregation_target_with_props.steps, 2), 0)) AS success_count, - countIf(aggregation_target_with_props.actor_id, ifNull(notEquals(aggregation_target_with_props.steps, 2), 1)) AS failure_count + SELECT persons.id, + persons.id AS id, + source.matching_events AS matching_events FROM + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons + INNER JOIN (SELECT funnel_actors.actor_id AS actor_id, - funnel_actors.steps AS steps, - arrayJoin(arrayZip(['$browser'], [JSONExtractString(persons.person_props, '$browser')])) AS prop + any(funnel_actors.matching_events) AS matching_events FROM (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, timestamp AS timestamp, steps AS steps, final_timestamp AS final_timestamp, @@ -326,6 +332,9 @@ steps AS steps, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, argMax(latest_0, steps) AS timestamp, argMax(latest_1, steps) AS final_timestamp, argMax(latest_0, steps) AS first_timestamp @@ -334,6 +343,9 @@ steps AS steps, max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, latest_0 AS latest_0, latest_1 AS latest_1, latest_0 AS latest_0 @@ -342,34 +354,70 @@ timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event FROM (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` FROM (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e__pdi.person_id AS aggregation_target, + e.uuid AS uuid, if(equals(e.event, 'user signed up'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, if(equals(e.event, 'paid'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` FROM events AS e INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + INNER JOIN + (SELECT person.id AS id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$browser'), ''), 'null'), '^"|"$', '') AS `properties___$browser` + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__pdi__person.`properties___$browser`, 'Positive'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, steps @@ -377,106 +425,47 @@ and isNull(max_steps))) WHERE ifNull(in(steps, [1, 2]), 0) ORDER BY aggregation_target ASC) AS funnel_actors - JOIN - (SELECT persons.id AS id, - persons.properties AS person_props - FROM - (SELECT person.id AS id, - person.properties AS properties - FROM person - WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), - (SELECT person.id AS id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons) AS persons ON equals(persons.id, funnel_actors.actor_id)) AS aggregation_target_with_props - GROUP BY (aggregation_target_with_props.prop).1, (aggregation_target_with_props.prop).2 - HAVING ifNull(notIn((aggregation_target_with_props.prop).1, []), 0) - LIMIT 100 - UNION ALL - SELECT 'Total_Values_In_Query' AS name, - countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count, - countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count + WHERE ifNull(equals(funnel_actors.steps, 2), 0) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC) AS source ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_basic_funnel_correlation_with_properties.2 + ''' + SELECT DISTINCT session_replay_events.session_id AS session_id FROM - (SELECT aggregation_target AS actor_id, - timestamp AS timestamp, - steps AS steps, - final_timestamp AS final_timestamp, - first_timestamp AS first_timestamp - FROM - (SELECT aggregation_target AS aggregation_target, - steps AS steps, - avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, - median(step_1_conversion_time) AS step_1_median_conversion_time_inner, - argMax(latest_0, steps) AS timestamp, - argMax(latest_1, steps) AS final_timestamp, - argMax(latest_0, steps) AS first_timestamp - FROM - (SELECT aggregation_target AS aggregation_target, - steps AS steps, - max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, - step_1_conversion_time AS step_1_conversion_time, - latest_0 AS latest_0, - latest_1 AS latest_1, - latest_0 AS latest_0 - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 - FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e__pdi.person_id AS aggregation_target, - if(equals(e.event, 'user signed up'), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(equals(e.event, 'paid'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) - WHERE ifNull(equals(step_0, 1), 0))) - GROUP BY aggregation_target, - steps - HAVING ifNull(equals(steps, max_steps), isNull(steps) - and isNull(max_steps))) - WHERE ifNull(in(steps, [1, 2]), 0) - ORDER BY aggregation_target ASC) AS funnel_actors + (SELECT session_replay_events.session_id AS session_id + FROM session_replay_events + WHERE equals(session_replay_events.team_id, 2) + GROUP BY session_replay_events.session_id) AS session_replay_events + WHERE ifNull(in(session_replay_events.session_id, ['']), 0) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ''' # --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_event_properties_and_groups +# name: TestClickhouseFunnelCorrelation.test_basic_funnel_correlation_with_properties.3 ''' - SELECT concat(ifNull(toString(event_name), ''), '::', ifNull(toString((prop).1), ''), '::', ifNull(toString((prop).2), '')) AS name, - countIf(actor_id, ifNull(equals(steps, 2), 0)) AS success_count, - countIf(actor_id, ifNull(notEquals(steps, 2), 1)) AS failure_count + SELECT persons.id, + persons.id AS id, + source.matching_events AS matching_events FROM + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons + INNER JOIN (SELECT funnel_actors.actor_id AS actor_id, - funnel_actors.steps AS steps, - event.event AS event_name, - arrayJoin(JSONExtractKeysAndValues(event.properties, 'String')) AS prop - FROM events AS event - JOIN + any(funnel_actors.matching_events) AS matching_events + FROM (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, timestamp AS timestamp, steps AS steps, final_timestamp AS final_timestamp, @@ -486,6 +475,9 @@ steps AS steps, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, argMax(latest_0, steps) AS timestamp, argMax(latest_1, steps) AS final_timestamp, argMax(latest_0, steps) AS first_timestamp @@ -494,6 +486,9 @@ steps AS steps, max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, latest_0 AS latest_0, latest_1 AS latest_1, latest_0 AS latest_0 @@ -502,115 +497,261 @@ timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event FROM (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` FROM (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e.`$group_1` AS aggregation_target, + e__pdi.person_id AS aggregation_target, + e.uuid AS uuid, if(equals(e.event, 'user signed up'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, if(equals(e.event, 'paid'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` FROM events AS e - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id AS id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$browser'), ''), 'null'), '^"|"$', '') AS `properties___$browser` + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__pdi__person.`properties___$browser`, 'Positive'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, steps HAVING ifNull(equals(steps, max_steps), isNull(steps) and isNull(max_steps))) WHERE ifNull(in(steps, [1, 2]), 0) - ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_1`) - WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), in(event.event, ['positively_related', 'negatively_related']))) - GROUP BY name - HAVING and(ifNull(greater(plus(success_count, failure_count), 2), 0), ifNull(notIn((prop).1, []), 0)) - LIMIT 100 - UNION ALL - SELECT 'Total_Values_In_Query' AS name, - countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count, - countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count + ORDER BY aggregation_target ASC) AS funnel_actors + WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC) AS source ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_basic_funnel_correlation_with_properties.4 + ''' + SELECT DISTINCT session_replay_events.session_id AS session_id FROM - (SELECT aggregation_target AS actor_id, - timestamp AS timestamp, - steps AS steps, - final_timestamp AS final_timestamp, - first_timestamp AS first_timestamp + (SELECT session_replay_events.session_id AS session_id + FROM session_replay_events + WHERE equals(session_replay_events.team_id, 2) + GROUP BY session_replay_events.session_id) AS session_replay_events + WHERE ifNull(in(session_replay_events.session_id, ['']), 0) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_basic_funnel_correlation_with_properties.5 + ''' + SELECT persons.id, + persons.id AS id, + source.matching_events AS matching_events + FROM + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons + INNER JOIN + (SELECT funnel_actors.actor_id AS actor_id, + any(funnel_actors.matching_events) AS matching_events FROM - (SELECT aggregation_target AS aggregation_target, + (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, + timestamp AS timestamp, steps AS steps, - avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, - median(step_1_conversion_time) AS step_1_median_conversion_time_inner, - argMax(latest_0, steps) AS timestamp, - argMax(latest_1, steps) AS final_timestamp, - argMax(latest_0, steps) AS first_timestamp + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp FROM (SELECT aggregation_target AS aggregation_target, steps AS steps, - max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, - step_1_conversion_time AS step_1_conversion_time, - latest_0 AS latest_0, - latest_1 AS latest_1, - latest_0 AS latest_0 + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp FROM (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 FROM (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 + latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e.`$group_1` AS aggregation_target, - if(equals(e.event, 'user signed up'), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(equals(e.event, 'paid'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 - FROM events AS e - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) - WHERE ifNull(equals(step_0, 1), 0))) - GROUP BY aggregation_target, - steps - HAVING ifNull(equals(steps, max_steps), isNull(steps) - and isNull(max_steps))) - WHERE ifNull(in(steps, [1, 2]), 0) - ORDER BY aggregation_target ASC) AS funnel_actors + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS aggregation_target, + e.uuid AS uuid, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id AS id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$browser'), ''), 'null'), '^"|"$', '') AS `properties___$browser` + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__pdi__person.`properties___$browser`, 'Negative'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + WHERE ifNull(equals(funnel_actors.steps, 2), 0) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC) AS source ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_basic_funnel_correlation_with_properties.6 + ''' + SELECT DISTINCT session_replay_events.session_id AS session_id + FROM + (SELECT session_replay_events.session_id AS session_id + FROM session_replay_events + WHERE equals(session_replay_events.team_id, 2) + GROUP BY session_replay_events.session_id) AS session_replay_events + WHERE ifNull(in(session_replay_events.session_id, ['']), 0) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 ''' # --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_event_properties_and_groups_materialized +# name: TestClickhouseFunnelCorrelation.test_basic_funnel_correlation_with_properties.7 ''' - SELECT concat(ifNull(toString(event_name), ''), '::', ifNull(toString((prop).1), ''), '::', ifNull(toString((prop).2), '')) AS name, - countIf(actor_id, ifNull(equals(steps, 2), 0)) AS success_count, - countIf(actor_id, ifNull(notEquals(steps, 2), 1)) AS failure_count + SELECT persons.id, + persons.id AS id, + source.matching_events AS matching_events FROM + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons + INNER JOIN (SELECT funnel_actors.actor_id AS actor_id, - funnel_actors.steps AS steps, - event.event AS event_name, - arrayJoin(JSONExtractKeysAndValues(event.properties, 'String')) AS prop - FROM events AS event - JOIN + any(funnel_actors.matching_events) AS matching_events + FROM (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, timestamp AS timestamp, steps AS steps, final_timestamp AS final_timestamp, @@ -620,6 +761,9 @@ steps AS steps, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, argMax(latest_0, steps) AS timestamp, argMax(latest_1, steps) AS final_timestamp, argMax(latest_0, steps) AS first_timestamp @@ -628,6 +772,9 @@ steps AS steps, max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, latest_0 AS latest_0, latest_1 AS latest_1, latest_0 AS latest_0 @@ -636,27 +783,160 @@ timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event FROM (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` FROM (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e.`$group_1` AS aggregation_target, + e__pdi.person_id AS aggregation_target, + e.uuid AS uuid, if(equals(e.event, 'user signed up'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, if(equals(e.event, 'paid'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` FROM events AS e - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id AS id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$browser'), ''), 'null'), '^"|"$', '') AS `properties___$browser` + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__pdi__person.`properties___$browser`, 'Negative'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC) AS source ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_basic_funnel_correlation_with_properties.8 + ''' + SELECT DISTINCT session_replay_events.session_id AS session_id + FROM + (SELECT session_replay_events.session_id AS session_id + FROM session_replay_events + WHERE equals(session_replay_events.team_id, 2) + GROUP BY session_replay_events.session_id) AS session_replay_events + WHERE ifNull(in(session_replay_events.session_id, ['']), 0) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_event_properties_and_groups + ''' + SELECT concat(ifNull(toString(event_name), ''), '::', ifNull(toString((prop).1), ''), '::', ifNull(toString((prop).2), '')) AS name, + countIf(actor_id, ifNull(equals(steps, 2), 0)) AS success_count, + countIf(actor_id, ifNull(notEquals(steps, 2), 1)) AS failure_count + FROM + (SELECT funnel_actors.actor_id AS actor_id, + funnel_actors.steps AS steps, + event.event AS event_name, + arrayJoin(JSONExtractKeysAndValues(event.properties, 'String')) AS prop + FROM events AS event + JOIN + (SELECT aggregation_target AS actor_id, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_1` AS aggregation_target, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 + FROM events AS e + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, steps @@ -732,69 +1012,75 @@ allow_experimental_object_type=1 ''' # --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_event_properties_and_groups_materialized ''' - SELECT event.event AS name, - countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count, - countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count - FROM events AS event - JOIN - (SELECT aggregation_target AS actor_id, - timestamp AS timestamp, - steps AS steps, - final_timestamp AS final_timestamp, - first_timestamp AS first_timestamp - FROM - (SELECT aggregation_target AS aggregation_target, + SELECT concat(ifNull(toString(event_name), ''), '::', ifNull(toString((prop).1), ''), '::', ifNull(toString((prop).2), '')) AS name, + countIf(actor_id, ifNull(equals(steps, 2), 0)) AS success_count, + countIf(actor_id, ifNull(notEquals(steps, 2), 1)) AS failure_count + FROM + (SELECT funnel_actors.actor_id AS actor_id, + funnel_actors.steps AS steps, + event.event AS event_name, + arrayJoin(JSONExtractKeysAndValues(event.properties, 'String')) AS prop + FROM events AS event + JOIN + (SELECT aggregation_target AS actor_id, + timestamp AS timestamp, steps AS steps, - avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, - median(step_1_conversion_time) AS step_1_median_conversion_time_inner, - argMax(latest_0, steps) AS timestamp, - argMax(latest_1, steps) AS final_timestamp, - argMax(latest_0, steps) AS first_timestamp + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp FROM (SELECT aggregation_target AS aggregation_target, steps AS steps, - max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, - step_1_conversion_time AS step_1_conversion_time, - latest_0 AS latest_0, - latest_1 AS latest_1, - latest_0 AS latest_0 + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp FROM (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 FROM (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 + latest_1 AS latest_1, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e.`$group_0` AS aggregation_target, - if(equals(e.event, 'user signed up'), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(equals(e.event, 'paid'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 - FROM events AS e - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) - WHERE ifNull(equals(step_0, 1), 0))) - GROUP BY aggregation_target, - steps - HAVING ifNull(equals(steps, max_steps), isNull(steps) - and isNull(max_steps))) - WHERE ifNull(in(steps, [1, 2]), 0) - ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) - WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), notIn(event.event, [])) + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_1` AS aggregation_target, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 + FROM events AS e + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_1`) + WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), in(event.event, ['positively_related', 'negatively_related']))) GROUP BY name + HAVING and(ifNull(greater(plus(success_count, failure_count), 2), 0), ifNull(notIn((prop).1, []), 0)) LIMIT 100 UNION ALL SELECT 'Total_Values_In_Query' AS name, @@ -841,7 +1127,7 @@ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 FROM (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e.`$group_0` AS aggregation_target, + e.`$group_1` AS aggregation_target, if(equals(e.event, 'user signed up'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(equals(e.event, 'paid'), 1, 0) AS step_1, @@ -860,7 +1146,7 @@ allow_experimental_object_type=1 ''' # --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups.1 +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups ''' SELECT event.event AS name, countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count, @@ -913,15 +1199,7 @@ if(equals(e.event, 'paid'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 FROM events AS e - LEFT JOIN - (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, - groups.group_type_index AS index, - groups.group_key AS key - FROM groups - WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) - GROUP BY groups.group_type_index, - groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'finance'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, steps @@ -983,15 +1261,7 @@ if(equals(e.event, 'paid'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 FROM events AS e - LEFT JOIN - (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, - groups.group_type_index AS index, - groups.group_key AS key - FROM groups - WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) - GROUP BY groups.group_type_index, - groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'finance'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, steps @@ -1004,75 +1274,457 @@ allow_experimental_object_type=1 ''' # --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups_poe_v2 +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups.1 ''' - SELECT event.event AS name, - countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count, - countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count - FROM events AS event - JOIN - (SELECT aggregation_target AS actor_id, - timestamp AS timestamp, - steps AS steps, - final_timestamp AS final_timestamp, - first_timestamp AS first_timestamp - FROM - (SELECT aggregation_target AS aggregation_target, + SELECT source.actor_id AS actor_id + FROM + (SELECT groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + INNER JOIN + (SELECT funnel_actors.actor_id AS actor_id, + any(funnel_actors.matching_events) AS matching_events + FROM events AS event + JOIN + (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, + timestamp AS timestamp, steps AS steps, - avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, - median(step_1_conversion_time) AS step_1_median_conversion_time_inner, - argMax(latest_0, steps) AS timestamp, - argMax(latest_1, steps) AS final_timestamp, - argMax(latest_0, steps) AS first_timestamp + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp FROM (SELECT aggregation_target AS aggregation_target, steps AS steps, - max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, - step_1_conversion_time AS step_1_conversion_time, - latest_0 AS latest_0, - latest_1 AS latest_1, - latest_0 AS latest_0 + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp FROM (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 FROM (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 + latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e.`$group_0` AS aggregation_target, - if(equals(e.event, 'user signed up'), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(equals(e.event, 'paid'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 - FROM events AS e - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) - WHERE ifNull(equals(step_0, 1), 0))) - GROUP BY aggregation_target, - steps - HAVING ifNull(equals(steps, max_steps), isNull(steps) - and isNull(max_steps))) - WHERE ifNull(in(steps, [1, 2]), 0) - ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) - WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), notIn(event.event, [])) - GROUP BY name - LIMIT 100 - UNION ALL - SELECT 'Total_Values_In_Query' AS name, + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + e.uuid AS uuid, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) + WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'positively_related'), ifNull(equals(funnel_actors.steps, 2), 0)) + GROUP BY actor_id + ORDER BY actor_id ASC) AS source ON equals(groups.key, source.actor_id) + ORDER BY source.actor_id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups.2 + ''' + SELECT source.actor_id AS actor_id + FROM + (SELECT groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + INNER JOIN + (SELECT funnel_actors.actor_id AS actor_id, + any(funnel_actors.matching_events) AS matching_events + FROM events AS event + JOIN + (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + e.uuid AS uuid, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) + WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'positively_related'), ifNull(notEquals(funnel_actors.steps, 2), 1)) + GROUP BY actor_id + ORDER BY actor_id ASC) AS source ON equals(groups.key, source.actor_id) + ORDER BY source.actor_id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups.3 + ''' + SELECT source.actor_id AS actor_id + FROM + (SELECT groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + INNER JOIN + (SELECT funnel_actors.actor_id AS actor_id, + any(funnel_actors.matching_events) AS matching_events + FROM events AS event + JOIN + (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + e.uuid AS uuid, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) + WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'negatively_related'), ifNull(equals(funnel_actors.steps, 2), 0)) + GROUP BY actor_id + ORDER BY actor_id ASC) AS source ON equals(groups.key, source.actor_id) + ORDER BY source.actor_id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups.4 + ''' + SELECT source.actor_id AS actor_id + FROM + (SELECT groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + INNER JOIN + (SELECT funnel_actors.actor_id AS actor_id, + any(funnel_actors.matching_events) AS matching_events + FROM events AS event + JOIN + (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + e.uuid AS uuid, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) + WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'negatively_related'), ifNull(notEquals(funnel_actors.steps, 2), 1)) + GROUP BY actor_id + ORDER BY actor_id ASC) AS source ON equals(groups.key, source.actor_id) + ORDER BY source.actor_id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups.5 + ''' + SELECT event.event AS name, countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count, countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count - FROM + FROM events AS event + JOIN (SELECT aggregation_target AS actor_id, timestamp AS timestamp, steps AS steps, @@ -1119,26 +1771,30 @@ if(equals(e.event, 'paid'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 FROM events AS e - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'finance'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, steps HAVING ifNull(equals(steps, max_steps), isNull(steps) and isNull(max_steps))) WHERE ifNull(in(steps, [1, 2]), 0) - ORDER BY aggregation_target ASC) AS funnel_actors - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups_poe_v2.1 - ''' - SELECT event.event AS name, + ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) + WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), notIn(event.event, [])) + GROUP BY name + LIMIT 100 + UNION ALL + SELECT 'Total_Values_In_Query' AS name, countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count, countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count - FROM events AS event - JOIN + FROM (SELECT aggregation_target AS actor_id, timestamp AS timestamp, steps AS steps, @@ -1200,9 +1856,2623 @@ HAVING ifNull(equals(steps, max_steps), isNull(steps) and isNull(max_steps))) WHERE ifNull(in(steps, [1, 2]), 0) - ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) - WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), notIn(event.event, [])) - GROUP BY name + ORDER BY aggregation_target ASC) AS funnel_actors + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups.6 + ''' + SELECT source.actor_id AS actor_id + FROM + (SELECT groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + INNER JOIN + (SELECT funnel_actors.actor_id AS actor_id, + any(funnel_actors.matching_events) AS matching_events + FROM events AS event + JOIN + (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + e.uuid AS uuid, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) + WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'negatively_related'), ifNull(equals(funnel_actors.steps, 2), 0)) + GROUP BY actor_id + ORDER BY actor_id ASC) AS source ON equals(groups.key, source.actor_id) + ORDER BY source.actor_id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups.7 + ''' + SELECT source.actor_id AS actor_id + FROM + (SELECT groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + INNER JOIN + (SELECT funnel_actors.actor_id AS actor_id, + any(funnel_actors.matching_events) AS matching_events + FROM events AS event + JOIN + (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + e.uuid AS uuid, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) + WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'negatively_related'), ifNull(notEquals(funnel_actors.steps, 2), 1)) + GROUP BY actor_id + ORDER BY actor_id ASC) AS source ON equals(groups.key, source.actor_id) + ORDER BY source.actor_id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups_poe_v2 + ''' + SELECT event.event AS name, + countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count, + countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count + FROM events AS event + JOIN + (SELECT aggregation_target AS actor_id, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 + FROM events AS e + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) + WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), notIn(event.event, [])) + GROUP BY name + LIMIT 100 + UNION ALL + SELECT 'Total_Values_In_Query' AS name, + countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count, + countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count + FROM + (SELECT aggregation_target AS actor_id, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 + FROM events AS e + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups_poe_v2.1 + ''' + SELECT source.actor_id AS actor_id + FROM + (SELECT groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + INNER JOIN + (SELECT funnel_actors.actor_id AS actor_id, + any(funnel_actors.matching_events) AS matching_events + FROM events AS event + JOIN + (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + e.uuid AS uuid, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) + WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'positively_related'), ifNull(equals(funnel_actors.steps, 2), 0)) + GROUP BY actor_id + ORDER BY actor_id ASC) AS source ON equals(groups.key, source.actor_id) + ORDER BY source.actor_id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups_poe_v2.2 + ''' + SELECT source.actor_id AS actor_id + FROM + (SELECT groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + INNER JOIN + (SELECT funnel_actors.actor_id AS actor_id, + any(funnel_actors.matching_events) AS matching_events + FROM events AS event + JOIN + (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + e.uuid AS uuid, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) + WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'positively_related'), ifNull(notEquals(funnel_actors.steps, 2), 1)) + GROUP BY actor_id + ORDER BY actor_id ASC) AS source ON equals(groups.key, source.actor_id) + ORDER BY source.actor_id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups_poe_v2.3 + ''' + SELECT source.actor_id AS actor_id + FROM + (SELECT groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + INNER JOIN + (SELECT funnel_actors.actor_id AS actor_id, + any(funnel_actors.matching_events) AS matching_events + FROM events AS event + JOIN + (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + e.uuid AS uuid, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) + WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'negatively_related'), ifNull(equals(funnel_actors.steps, 2), 0)) + GROUP BY actor_id + ORDER BY actor_id ASC) AS source ON equals(groups.key, source.actor_id) + ORDER BY source.actor_id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups_poe_v2.4 + ''' + SELECT source.actor_id AS actor_id + FROM + (SELECT groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + INNER JOIN + (SELECT funnel_actors.actor_id AS actor_id, + any(funnel_actors.matching_events) AS matching_events + FROM events AS event + JOIN + (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + e.uuid AS uuid, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) + WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'negatively_related'), ifNull(notEquals(funnel_actors.steps, 2), 1)) + GROUP BY actor_id + ORDER BY actor_id ASC) AS source ON equals(groups.key, source.actor_id) + ORDER BY source.actor_id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups_poe_v2.5 + ''' + SELECT event.event AS name, + countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count, + countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count + FROM events AS event + JOIN + (SELECT aggregation_target AS actor_id, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'finance'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) + WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), notIn(event.event, [])) + GROUP BY name + LIMIT 100 + UNION ALL + SELECT 'Total_Values_In_Query' AS name, + countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count, + countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count + FROM + (SELECT aggregation_target AS actor_id, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'finance'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups_poe_v2.6 + ''' + SELECT source.actor_id AS actor_id + FROM + (SELECT groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + INNER JOIN + (SELECT funnel_actors.actor_id AS actor_id, + any(funnel_actors.matching_events) AS matching_events + FROM events AS event + JOIN + (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + e.uuid AS uuid, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) + WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'negatively_related'), ifNull(equals(funnel_actors.steps, 2), 0)) + GROUP BY actor_id + ORDER BY actor_id ASC) AS source ON equals(groups.key, source.actor_id) + ORDER BY source.actor_id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups_poe_v2.7 + ''' + SELECT source.actor_id AS actor_id + FROM + (SELECT groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + INNER JOIN + (SELECT funnel_actors.actor_id AS actor_id, + any(funnel_actors.matching_events) AS matching_events + FROM events AS event + JOIN + (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + e.uuid AS uuid, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) + WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'negatively_related'), ifNull(notEquals(funnel_actors.steps, 2), 1)) + GROUP BY actor_id + ORDER BY actor_id ASC) AS source ON equals(groups.key, source.actor_id) + ORDER BY source.actor_id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups + ''' + SELECT concat(ifNull(toString((aggregation_target_with_props.prop).1), ''), '::', ifNull(toString((aggregation_target_with_props.prop).2), '')) AS name, + countIf(aggregation_target_with_props.actor_id, ifNull(equals(aggregation_target_with_props.steps, 2), 0)) AS success_count, + countIf(aggregation_target_with_props.actor_id, ifNull(notEquals(aggregation_target_with_props.steps, 2), 1)) AS failure_count + FROM + (SELECT funnel_actors.actor_id AS actor_id, + funnel_actors.steps AS steps, + arrayJoin(arrayZip(['industry'], [JSONExtractString(groups_0.properties, 'industry')])) AS prop + FROM + (SELECT aggregation_target AS actor_id, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 + FROM events AS e + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + LEFT JOIN + (SELECT groups.key AS key, + groups.properties AS properties + FROM + (SELECT argMax(groups.group_properties, groups._timestamp) AS properties, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + WHERE ifNull(equals(groups.index, 0), 0)) AS groups_0 ON equals(funnel_actors.actor_id, groups_0.key)) AS aggregation_target_with_props + GROUP BY (aggregation_target_with_props.prop).1, (aggregation_target_with_props.prop).2 + HAVING ifNull(notIn((aggregation_target_with_props.prop).1, []), 0) + LIMIT 100 + UNION ALL + SELECT 'Total_Values_In_Query' AS name, + countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count, + countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count + FROM + (SELECT aggregation_target AS actor_id, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 + FROM events AS e + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups.1 + ''' + SELECT source.actor_id AS actor_id + FROM + (SELECT groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + INNER JOIN + (SELECT funnel_actors.actor_id AS actor_id, + any(funnel_actors.matching_events) AS matching_events + FROM + (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + e.uuid AS uuid, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'positive'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + WHERE ifNull(equals(funnel_actors.steps, 2), 0) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id) + ORDER BY source.actor_id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups.2 + ''' + SELECT source.actor_id AS actor_id + FROM + (SELECT groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + INNER JOIN + (SELECT funnel_actors.actor_id AS actor_id, + any(funnel_actors.matching_events) AS matching_events + FROM + (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + e.uuid AS uuid, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'positive'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id) + ORDER BY source.actor_id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups.3 + ''' + SELECT source.actor_id AS actor_id + FROM + (SELECT groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + INNER JOIN + (SELECT funnel_actors.actor_id AS actor_id, + any(funnel_actors.matching_events) AS matching_events + FROM + (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + e.uuid AS uuid, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'negative'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + WHERE ifNull(equals(funnel_actors.steps, 2), 0) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id) + ORDER BY source.actor_id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups.4 + ''' + SELECT source.actor_id AS actor_id + FROM + (SELECT groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + INNER JOIN + (SELECT funnel_actors.actor_id AS actor_id, + any(funnel_actors.matching_events) AS matching_events + FROM + (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + e.uuid AS uuid, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'negative'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id) + ORDER BY source.actor_id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups.5 + ''' + SELECT concat(ifNull(toString((aggregation_target_with_props.prop).1), ''), '::', ifNull(toString((aggregation_target_with_props.prop).2), '')) AS name, + countIf(aggregation_target_with_props.actor_id, ifNull(equals(aggregation_target_with_props.steps, 2), 0)) AS success_count, + countIf(aggregation_target_with_props.actor_id, ifNull(notEquals(aggregation_target_with_props.steps, 2), 1)) AS failure_count + FROM + (SELECT funnel_actors.actor_id AS actor_id, + funnel_actors.steps AS steps, + arrayJoin(JSONExtractKeysAndValues(groups_0.properties, 'String')) AS prop + FROM + (SELECT aggregation_target AS actor_id, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 + FROM events AS e + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + LEFT JOIN + (SELECT groups.key AS key, + groups.properties AS properties + FROM + (SELECT argMax(groups.group_properties, groups._timestamp) AS properties, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + WHERE ifNull(equals(groups.index, 0), 0)) AS groups_0 ON equals(funnel_actors.actor_id, groups_0.key)) AS aggregation_target_with_props + GROUP BY (aggregation_target_with_props.prop).1, (aggregation_target_with_props.prop).2 + HAVING ifNull(notIn((aggregation_target_with_props.prop).1, []), 0) + LIMIT 100 + UNION ALL + SELECT 'Total_Values_In_Query' AS name, + countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count, + countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count + FROM + (SELECT aggregation_target AS actor_id, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 + FROM events AS e + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_materialized + ''' + SELECT concat(ifNull(toString((aggregation_target_with_props.prop).1), ''), '::', ifNull(toString((aggregation_target_with_props.prop).2), '')) AS name, + countIf(aggregation_target_with_props.actor_id, ifNull(equals(aggregation_target_with_props.steps, 2), 0)) AS success_count, + countIf(aggregation_target_with_props.actor_id, ifNull(notEquals(aggregation_target_with_props.steps, 2), 1)) AS failure_count + FROM + (SELECT funnel_actors.actor_id AS actor_id, + funnel_actors.steps AS steps, + arrayJoin(arrayZip(['industry'], [JSONExtractString(groups_0.properties, 'industry')])) AS prop + FROM + (SELECT aggregation_target AS actor_id, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 + FROM events AS e + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + LEFT JOIN + (SELECT groups.key AS key, + groups.properties AS properties + FROM + (SELECT argMax(groups.group_properties, groups._timestamp) AS properties, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + WHERE ifNull(equals(groups.index, 0), 0)) AS groups_0 ON equals(funnel_actors.actor_id, groups_0.key)) AS aggregation_target_with_props + GROUP BY (aggregation_target_with_props.prop).1, (aggregation_target_with_props.prop).2 + HAVING ifNull(notIn((aggregation_target_with_props.prop).1, []), 0) + LIMIT 100 + UNION ALL + SELECT 'Total_Values_In_Query' AS name, + countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count, + countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count + FROM + (SELECT aggregation_target AS actor_id, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 + FROM events AS e + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_materialized.1 + ''' + SELECT source.actor_id AS actor_id + FROM + (SELECT groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + INNER JOIN + (SELECT funnel_actors.actor_id AS actor_id, + any(funnel_actors.matching_events) AS matching_events + FROM + (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + e.uuid AS uuid, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'positive'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + WHERE ifNull(equals(funnel_actors.steps, 2), 0) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id) + ORDER BY source.actor_id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_materialized.2 + ''' + SELECT source.actor_id AS actor_id + FROM + (SELECT groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + INNER JOIN + (SELECT funnel_actors.actor_id AS actor_id, + any(funnel_actors.matching_events) AS matching_events + FROM + (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + e.uuid AS uuid, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'positive'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id) + ORDER BY source.actor_id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_materialized.3 + ''' + SELECT source.actor_id AS actor_id + FROM + (SELECT groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + INNER JOIN + (SELECT funnel_actors.actor_id AS actor_id, + any(funnel_actors.matching_events) AS matching_events + FROM + (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + e.uuid AS uuid, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'negative'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + WHERE ifNull(equals(funnel_actors.steps, 2), 0) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id) + ORDER BY source.actor_id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_materialized.4 + ''' + SELECT source.actor_id AS actor_id + FROM + (SELECT groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + INNER JOIN + (SELECT funnel_actors.actor_id AS actor_id, + any(funnel_actors.matching_events) AS matching_events + FROM + (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + e.uuid AS uuid, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'negative'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id) + ORDER BY source.actor_id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_materialized.5 + ''' + SELECT concat(ifNull(toString((aggregation_target_with_props.prop).1), ''), '::', ifNull(toString((aggregation_target_with_props.prop).2), '')) AS name, + countIf(aggregation_target_with_props.actor_id, ifNull(equals(aggregation_target_with_props.steps, 2), 0)) AS success_count, + countIf(aggregation_target_with_props.actor_id, ifNull(notEquals(aggregation_target_with_props.steps, 2), 1)) AS failure_count + FROM + (SELECT funnel_actors.actor_id AS actor_id, + funnel_actors.steps AS steps, + arrayJoin(JSONExtractKeysAndValues(groups_0.properties, 'String')) AS prop + FROM + (SELECT aggregation_target AS actor_id, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 + FROM events AS e + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + LEFT JOIN + (SELECT groups.key AS key, + groups.properties AS properties + FROM + (SELECT argMax(groups.group_properties, groups._timestamp) AS properties, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + WHERE ifNull(equals(groups.index, 0), 0)) AS groups_0 ON equals(funnel_actors.actor_id, groups_0.key)) AS aggregation_target_with_props + GROUP BY (aggregation_target_with_props.prop).1, (aggregation_target_with_props.prop).2 + HAVING ifNull(notIn((aggregation_target_with_props.prop).1, []), 0) LIMIT 100 UNION ALL SELECT 'Total_Values_In_Query' AS name, @@ -1255,15 +4525,7 @@ if(equals(e.event, 'paid'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 FROM events AS e - LEFT JOIN - (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, - groups.group_type_index AS index, - groups.group_key AS key - FROM groups - WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) - GROUP BY groups.group_type_index, - groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'finance'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, steps @@ -1276,7 +4538,7 @@ allow_experimental_object_type=1 ''' # --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events ''' SELECT concat(ifNull(toString((aggregation_target_with_props.prop).1), ''), '::', ifNull(toString((aggregation_target_with_props.prop).2), '')) AS name, countIf(aggregation_target_with_props.actor_id, ifNull(equals(aggregation_target_with_props.steps, 2), 0)) AS success_count, @@ -1399,27 +4661,499 @@ min(latest_1) OVER (PARTITION BY aggregation_target ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e.`$group_0` AS aggregation_target, - if(equals(e.event, 'user signed up'), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(equals(e.event, 'paid'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 - FROM events AS e - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) - WHERE ifNull(equals(step_0, 1), 0))) - GROUP BY aggregation_target, - steps - HAVING ifNull(equals(steps, max_steps), isNull(steps) - and isNull(max_steps))) - WHERE ifNull(in(steps, [1, 2]), 0) - ORDER BY aggregation_target ASC) AS funnel_actors - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 + FROM events AS e + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events.1 + ''' + SELECT source.actor_id AS actor_id + FROM + (SELECT groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + INNER JOIN + (SELECT funnel_actors.actor_id AS actor_id, + any(funnel_actors.matching_events) AS matching_events + FROM + (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + e.uuid AS uuid, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'positive'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + WHERE ifNull(equals(funnel_actors.steps, 2), 0) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id) + ORDER BY source.actor_id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events.2 + ''' + SELECT source.actor_id AS actor_id + FROM + (SELECT groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + INNER JOIN + (SELECT funnel_actors.actor_id AS actor_id, + any(funnel_actors.matching_events) AS matching_events + FROM + (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + e.uuid AS uuid, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'positive'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id) + ORDER BY source.actor_id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events.3 + ''' + SELECT source.actor_id AS actor_id + FROM + (SELECT groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + INNER JOIN + (SELECT funnel_actors.actor_id AS actor_id, + any(funnel_actors.matching_events) AS matching_events + FROM + (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + e.uuid AS uuid, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'negative'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + WHERE ifNull(equals(funnel_actors.steps, 2), 0) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id) + ORDER BY source.actor_id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events.4 + ''' + SELECT source.actor_id AS actor_id + FROM + (SELECT groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + INNER JOIN + (SELECT funnel_actors.actor_id AS actor_id, + any(funnel_actors.matching_events) AS matching_events + FROM + (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + e.uuid AS uuid, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'negative'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id) + ORDER BY source.actor_id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 ''' # --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups.1 +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events.5 ''' SELECT concat(ifNull(toString((aggregation_target_with_props.prop).1), ''), '::', ifNull(toString((aggregation_target_with_props.prop).2), '')) AS name, countIf(aggregation_target_with_props.actor_id, ifNull(equals(aggregation_target_with_props.steps, 2), 0)) AS success_count, @@ -1562,7 +5296,7 @@ allow_experimental_object_type=1 ''' # --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_materialized +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_materialized ''' SELECT concat(ifNull(toString((aggregation_target_with_props.prop).1), ''), '::', ifNull(toString((aggregation_target_with_props.prop).2), '')) AS name, countIf(aggregation_target_with_props.actor_id, ifNull(equals(aggregation_target_with_props.steps, 2), 0)) AS success_count, @@ -1705,17 +5439,22 @@ allow_experimental_object_type=1 ''' # --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_materialized.1 +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_materialized.1 ''' - SELECT concat(ifNull(toString((aggregation_target_with_props.prop).1), ''), '::', ifNull(toString((aggregation_target_with_props.prop).2), '')) AS name, - countIf(aggregation_target_with_props.actor_id, ifNull(equals(aggregation_target_with_props.steps, 2), 0)) AS success_count, - countIf(aggregation_target_with_props.actor_id, ifNull(notEquals(aggregation_target_with_props.steps, 2), 1)) AS failure_count + SELECT source.actor_id AS actor_id FROM + (SELECT groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + INNER JOIN (SELECT funnel_actors.actor_id AS actor_id, - funnel_actors.steps AS steps, - arrayJoin(JSONExtractKeysAndValues(groups_0.properties, 'String')) AS prop + any(funnel_actors.matching_events) AS matching_events FROM (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, timestamp AS timestamp, steps AS steps, final_timestamp AS final_timestamp, @@ -1725,6 +5464,9 @@ steps AS steps, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, argMax(latest_0, steps) AS timestamp, argMax(latest_1, steps) AS final_timestamp, argMax(latest_0, steps) AS first_timestamp @@ -1733,6 +5475,9 @@ steps AS steps, max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, latest_0 AS latest_0, latest_1 AS latest_1, latest_0 AS latest_0 @@ -1741,27 +5486,60 @@ timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event FROM (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` FROM (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e.`$group_0` AS aggregation_target, + e.uuid AS uuid, if(equals(e.event, 'user signed up'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, if(equals(e.event, 'paid'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` FROM events AS e - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'positive'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, steps @@ -1769,96 +5547,150 @@ and isNull(max_steps))) WHERE ifNull(in(steps, [1, 2]), 0) ORDER BY aggregation_target ASC) AS funnel_actors - LEFT JOIN - (SELECT groups.key AS key, - groups.properties AS properties - FROM - (SELECT argMax(groups.group_properties, groups._timestamp) AS properties, - groups.group_type_index AS index, - groups.group_key AS key - FROM groups - WHERE equals(groups.team_id, 2) - GROUP BY groups.group_type_index, - groups.group_key) AS groups - WHERE ifNull(equals(groups.index, 0), 0)) AS groups_0 ON equals(funnel_actors.actor_id, groups_0.key)) AS aggregation_target_with_props - GROUP BY (aggregation_target_with_props.prop).1, (aggregation_target_with_props.prop).2 - HAVING ifNull(notIn((aggregation_target_with_props.prop).1, []), 0) - LIMIT 100 - UNION ALL - SELECT 'Total_Values_In_Query' AS name, - countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count, - countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count + WHERE ifNull(equals(funnel_actors.steps, 2), 0) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id) + ORDER BY source.actor_id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_materialized.2 + ''' + SELECT source.actor_id AS actor_id FROM - (SELECT aggregation_target AS actor_id, - timestamp AS timestamp, - steps AS steps, - final_timestamp AS final_timestamp, - first_timestamp AS first_timestamp + (SELECT groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + INNER JOIN + (SELECT funnel_actors.actor_id AS actor_id, + any(funnel_actors.matching_events) AS matching_events FROM - (SELECT aggregation_target AS aggregation_target, + (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, + timestamp AS timestamp, steps AS steps, - avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, - median(step_1_conversion_time) AS step_1_median_conversion_time_inner, - argMax(latest_0, steps) AS timestamp, - argMax(latest_1, steps) AS final_timestamp, - argMax(latest_0, steps) AS first_timestamp + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp FROM (SELECT aggregation_target AS aggregation_target, steps AS steps, - max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, - step_1_conversion_time AS step_1_conversion_time, - latest_0 AS latest_0, - latest_1 AS latest_1, - latest_0 AS latest_0 + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp FROM (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 FROM (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 + latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e.`$group_0` AS aggregation_target, - if(equals(e.event, 'user signed up'), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(equals(e.event, 'paid'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 - FROM events AS e - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) - WHERE ifNull(equals(step_0, 1), 0))) - GROUP BY aggregation_target, - steps - HAVING ifNull(equals(steps, max_steps), isNull(steps) - and isNull(max_steps))) - WHERE ifNull(in(steps, [1, 2]), 0) - ORDER BY aggregation_target ASC) AS funnel_actors - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + e.uuid AS uuid, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'positive'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id) + ORDER BY source.actor_id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 ''' # --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_materialized.3 ''' - SELECT concat(ifNull(toString((aggregation_target_with_props.prop).1), ''), '::', ifNull(toString((aggregation_target_with_props.prop).2), '')) AS name, - countIf(aggregation_target_with_props.actor_id, ifNull(equals(aggregation_target_with_props.steps, 2), 0)) AS success_count, - countIf(aggregation_target_with_props.actor_id, ifNull(notEquals(aggregation_target_with_props.steps, 2), 1)) AS failure_count + SELECT source.actor_id AS actor_id FROM + (SELECT groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + INNER JOIN (SELECT funnel_actors.actor_id AS actor_id, - funnel_actors.steps AS steps, - arrayJoin(arrayZip(['industry'], [JSONExtractString(groups_0.properties, 'industry')])) AS prop + any(funnel_actors.matching_events) AS matching_events FROM (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, timestamp AS timestamp, steps AS steps, final_timestamp AS final_timestamp, @@ -1868,6 +5700,9 @@ steps AS steps, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, argMax(latest_0, steps) AS timestamp, argMax(latest_1, steps) AS final_timestamp, argMax(latest_0, steps) AS first_timestamp @@ -1876,6 +5711,9 @@ steps AS steps, max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, latest_0 AS latest_0, latest_1 AS latest_1, latest_0 AS latest_0 @@ -1884,27 +5722,60 @@ timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event FROM (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` FROM (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e.`$group_0` AS aggregation_target, + e.uuid AS uuid, if(equals(e.event, 'user signed up'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, if(equals(e.event, 'paid'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` FROM events AS e - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'negative'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, steps @@ -1912,86 +5783,135 @@ and isNull(max_steps))) WHERE ifNull(in(steps, [1, 2]), 0) ORDER BY aggregation_target ASC) AS funnel_actors - LEFT JOIN - (SELECT groups.key AS key, - groups.properties AS properties - FROM - (SELECT argMax(groups.group_properties, groups._timestamp) AS properties, - groups.group_type_index AS index, - groups.group_key AS key - FROM groups - WHERE equals(groups.team_id, 2) - GROUP BY groups.group_type_index, - groups.group_key) AS groups - WHERE ifNull(equals(groups.index, 0), 0)) AS groups_0 ON equals(funnel_actors.actor_id, groups_0.key)) AS aggregation_target_with_props - GROUP BY (aggregation_target_with_props.prop).1, (aggregation_target_with_props.prop).2 - HAVING ifNull(notIn((aggregation_target_with_props.prop).1, []), 0) - LIMIT 100 - UNION ALL - SELECT 'Total_Values_In_Query' AS name, - countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count, - countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count + WHERE ifNull(equals(funnel_actors.steps, 2), 0) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id) + ORDER BY source.actor_id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_materialized.4 + ''' + SELECT source.actor_id AS actor_id FROM - (SELECT aggregation_target AS actor_id, - timestamp AS timestamp, - steps AS steps, - final_timestamp AS final_timestamp, - first_timestamp AS first_timestamp + (SELECT groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + INNER JOIN + (SELECT funnel_actors.actor_id AS actor_id, + any(funnel_actors.matching_events) AS matching_events FROM - (SELECT aggregation_target AS aggregation_target, + (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, + timestamp AS timestamp, steps AS steps, - avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, - median(step_1_conversion_time) AS step_1_median_conversion_time_inner, - argMax(latest_0, steps) AS timestamp, - argMax(latest_1, steps) AS final_timestamp, - argMax(latest_0, steps) AS first_timestamp + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp FROM (SELECT aggregation_target AS aggregation_target, steps AS steps, - max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, - step_1_conversion_time AS step_1_conversion_time, - latest_0 AS latest_0, - latest_1 AS latest_1, - latest_0 AS latest_0 + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp FROM (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 FROM (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 - FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e.`$group_0` AS aggregation_target, - if(equals(e.event, 'user signed up'), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(equals(e.event, 'paid'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 - FROM events AS e - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) - WHERE ifNull(equals(step_0, 1), 0))) - GROUP BY aggregation_target, - steps - HAVING ifNull(equals(steps, max_steps), isNull(steps) - and isNull(max_steps))) - WHERE ifNull(in(steps, [1, 2]), 0) - ORDER BY aggregation_target ASC) AS funnel_actors - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 + latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + e.uuid AS uuid, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'negative'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id) + ORDER BY source.actor_id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 ''' # --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events.1 +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_materialized.5 ''' SELECT concat(ifNull(toString((aggregation_target_with_props.prop).1), ''), '::', ifNull(toString((aggregation_target_with_props.prop).2), '')) AS name, countIf(aggregation_target_with_props.actor_id, ifNull(equals(aggregation_target_with_props.steps, 2), 0)) AS success_count, @@ -2134,7 +6054,7 @@ allow_experimental_object_type=1 ''' # --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_materialized +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_poe_v2 ''' SELECT concat(ifNull(toString((aggregation_target_with_props.prop).1), ''), '::', ifNull(toString((aggregation_target_with_props.prop).2), '')) AS name, countIf(aggregation_target_with_props.actor_id, ifNull(equals(aggregation_target_with_props.steps, 2), 0)) AS success_count, @@ -2277,17 +6197,22 @@ allow_experimental_object_type=1 ''' # --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_materialized.1 +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_poe_v2.1 ''' - SELECT concat(ifNull(toString((aggregation_target_with_props.prop).1), ''), '::', ifNull(toString((aggregation_target_with_props.prop).2), '')) AS name, - countIf(aggregation_target_with_props.actor_id, ifNull(equals(aggregation_target_with_props.steps, 2), 0)) AS success_count, - countIf(aggregation_target_with_props.actor_id, ifNull(notEquals(aggregation_target_with_props.steps, 2), 1)) AS failure_count + SELECT source.actor_id AS actor_id FROM + (SELECT groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + INNER JOIN (SELECT funnel_actors.actor_id AS actor_id, - funnel_actors.steps AS steps, - arrayJoin(JSONExtractKeysAndValues(groups_0.properties, 'String')) AS prop + any(funnel_actors.matching_events) AS matching_events FROM (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, timestamp AS timestamp, steps AS steps, final_timestamp AS final_timestamp, @@ -2297,6 +6222,9 @@ steps AS steps, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, argMax(latest_0, steps) AS timestamp, argMax(latest_1, steps) AS final_timestamp, argMax(latest_0, steps) AS first_timestamp @@ -2305,6 +6233,9 @@ steps AS steps, max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, latest_0 AS latest_0, latest_1 AS latest_1, latest_0 AS latest_0 @@ -2313,27 +6244,60 @@ timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event FROM (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` FROM (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e.`$group_0` AS aggregation_target, + e.uuid AS uuid, if(equals(e.event, 'user signed up'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, if(equals(e.event, 'paid'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` FROM events AS e - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'positive'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, steps @@ -2341,96 +6305,150 @@ and isNull(max_steps))) WHERE ifNull(in(steps, [1, 2]), 0) ORDER BY aggregation_target ASC) AS funnel_actors - LEFT JOIN - (SELECT groups.key AS key, - groups.properties AS properties - FROM - (SELECT argMax(groups.group_properties, groups._timestamp) AS properties, - groups.group_type_index AS index, - groups.group_key AS key - FROM groups - WHERE equals(groups.team_id, 2) - GROUP BY groups.group_type_index, - groups.group_key) AS groups - WHERE ifNull(equals(groups.index, 0), 0)) AS groups_0 ON equals(funnel_actors.actor_id, groups_0.key)) AS aggregation_target_with_props - GROUP BY (aggregation_target_with_props.prop).1, (aggregation_target_with_props.prop).2 - HAVING ifNull(notIn((aggregation_target_with_props.prop).1, []), 0) - LIMIT 100 - UNION ALL - SELECT 'Total_Values_In_Query' AS name, - countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count, - countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count + WHERE ifNull(equals(funnel_actors.steps, 2), 0) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id) + ORDER BY source.actor_id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_poe_v2.2 + ''' + SELECT source.actor_id AS actor_id FROM - (SELECT aggregation_target AS actor_id, - timestamp AS timestamp, - steps AS steps, - final_timestamp AS final_timestamp, - first_timestamp AS first_timestamp + (SELECT groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + INNER JOIN + (SELECT funnel_actors.actor_id AS actor_id, + any(funnel_actors.matching_events) AS matching_events FROM - (SELECT aggregation_target AS aggregation_target, + (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, + timestamp AS timestamp, steps AS steps, - avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, - median(step_1_conversion_time) AS step_1_median_conversion_time_inner, - argMax(latest_0, steps) AS timestamp, - argMax(latest_1, steps) AS final_timestamp, - argMax(latest_0, steps) AS first_timestamp + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp FROM (SELECT aggregation_target AS aggregation_target, steps AS steps, - max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, - step_1_conversion_time AS step_1_conversion_time, - latest_0 AS latest_0, - latest_1 AS latest_1, - latest_0 AS latest_0 + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp FROM (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 FROM (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 + latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e.`$group_0` AS aggregation_target, - if(equals(e.event, 'user signed up'), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(equals(e.event, 'paid'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 - FROM events AS e - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) - WHERE ifNull(equals(step_0, 1), 0))) - GROUP BY aggregation_target, - steps - HAVING ifNull(equals(steps, max_steps), isNull(steps) - and isNull(max_steps))) - WHERE ifNull(in(steps, [1, 2]), 0) - ORDER BY aggregation_target ASC) AS funnel_actors - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + e.uuid AS uuid, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'positive'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id) + ORDER BY source.actor_id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 ''' # --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_poe_v2 +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_poe_v2.3 ''' - SELECT concat(ifNull(toString((aggregation_target_with_props.prop).1), ''), '::', ifNull(toString((aggregation_target_with_props.prop).2), '')) AS name, - countIf(aggregation_target_with_props.actor_id, ifNull(equals(aggregation_target_with_props.steps, 2), 0)) AS success_count, - countIf(aggregation_target_with_props.actor_id, ifNull(notEquals(aggregation_target_with_props.steps, 2), 1)) AS failure_count + SELECT source.actor_id AS actor_id FROM + (SELECT groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + INNER JOIN (SELECT funnel_actors.actor_id AS actor_id, - funnel_actors.steps AS steps, - arrayJoin(arrayZip(['industry'], [JSONExtractString(groups_0.properties, 'industry')])) AS prop + any(funnel_actors.matching_events) AS matching_events FROM (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, timestamp AS timestamp, steps AS steps, final_timestamp AS final_timestamp, @@ -2440,6 +6458,9 @@ steps AS steps, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, argMax(latest_0, steps) AS timestamp, argMax(latest_1, steps) AS final_timestamp, argMax(latest_0, steps) AS first_timestamp @@ -2448,6 +6469,9 @@ steps AS steps, max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, latest_0 AS latest_0, latest_1 AS latest_1, latest_0 AS latest_0 @@ -2456,27 +6480,60 @@ timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event FROM (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` FROM (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e.`$group_0` AS aggregation_target, + e.uuid AS uuid, if(equals(e.event, 'user signed up'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, if(equals(e.event, 'paid'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` FROM events AS e - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'negative'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, steps @@ -2484,86 +6541,135 @@ and isNull(max_steps))) WHERE ifNull(in(steps, [1, 2]), 0) ORDER BY aggregation_target ASC) AS funnel_actors - LEFT JOIN - (SELECT groups.key AS key, - groups.properties AS properties - FROM - (SELECT argMax(groups.group_properties, groups._timestamp) AS properties, - groups.group_type_index AS index, - groups.group_key AS key - FROM groups - WHERE equals(groups.team_id, 2) - GROUP BY groups.group_type_index, - groups.group_key) AS groups - WHERE ifNull(equals(groups.index, 0), 0)) AS groups_0 ON equals(funnel_actors.actor_id, groups_0.key)) AS aggregation_target_with_props - GROUP BY (aggregation_target_with_props.prop).1, (aggregation_target_with_props.prop).2 - HAVING ifNull(notIn((aggregation_target_with_props.prop).1, []), 0) - LIMIT 100 - UNION ALL - SELECT 'Total_Values_In_Query' AS name, - countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count, - countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count + WHERE ifNull(equals(funnel_actors.steps, 2), 0) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id) + ORDER BY source.actor_id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_poe_v2.4 + ''' + SELECT source.actor_id AS actor_id FROM - (SELECT aggregation_target AS actor_id, - timestamp AS timestamp, - steps AS steps, - final_timestamp AS final_timestamp, - first_timestamp AS first_timestamp + (SELECT groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + INNER JOIN + (SELECT funnel_actors.actor_id AS actor_id, + any(funnel_actors.matching_events) AS matching_events FROM - (SELECT aggregation_target AS aggregation_target, + (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, + timestamp AS timestamp, steps AS steps, - avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, - median(step_1_conversion_time) AS step_1_median_conversion_time_inner, - argMax(latest_0, steps) AS timestamp, - argMax(latest_1, steps) AS final_timestamp, - argMax(latest_0, steps) AS first_timestamp + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp FROM (SELECT aggregation_target AS aggregation_target, steps AS steps, - max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, - step_1_conversion_time AS step_1_conversion_time, - latest_0 AS latest_0, - latest_1 AS latest_1, - latest_0 AS latest_0 + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp FROM (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 FROM (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 + latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e.`$group_0` AS aggregation_target, - if(equals(e.event, 'user signed up'), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(equals(e.event, 'paid'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 - FROM events AS e - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) - WHERE ifNull(equals(step_0, 1), 0))) - GROUP BY aggregation_target, - steps - HAVING ifNull(equals(steps, max_steps), isNull(steps) - and isNull(max_steps))) - WHERE ifNull(in(steps, [1, 2]), 0) - ORDER BY aggregation_target ASC) AS funnel_actors - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + e.uuid AS uuid, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'negative'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id) + ORDER BY source.actor_id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 ''' # --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_poe_v2.1 +# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_poe_v2.5 ''' SELECT concat(ifNull(toString((aggregation_target_with_props.prop).1), ''), '::', ifNull(toString((aggregation_target_with_props.prop).2), '')) AS name, countIf(aggregation_target_with_props.actor_id, ifNull(equals(aggregation_target_with_props.steps, 2), 0)) AS success_count, diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlations_persons.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlations_persons.ambr new file mode 100644 index 0000000000000..fe7404ea9b7a1 --- /dev/null +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlations_persons.ambr @@ -0,0 +1,776 @@ +# serializer version: 1 +# name: TestFunnelCorrelationsActors.test_funnel_correlation_on_event_with_recordings + ''' + SELECT persons.id, + persons.id AS id, + source.matching_events AS matching_events + FROM + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons + INNER JOIN + (SELECT funnel_actors.actor_id AS actor_id, + any(funnel_actors.matching_events) AS matching_events + FROM events AS event + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS event__pdi ON equals(event.distinct_id, event__pdi.distinct_id) + JOIN + (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS aggregation_target, + e.uuid AS uuid, + if(equals(e.event, '$pageview'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, + if(equals(e.event, 'insight analyzed'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('$pageview', 'insight analyzed'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors ON equals(event__pdi.person_id, funnel_actors.actor_id) + WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC')))), notIn(event.event, ['$pageview', 'insight analyzed']), equals(event.event, 'insight loaded'), ifNull(equals(funnel_actors.steps, 2), 0)) + GROUP BY actor_id + ORDER BY actor_id ASC) AS source ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelCorrelationsActors.test_funnel_correlation_on_event_with_recordings.1 + ''' + SELECT DISTINCT session_replay_events.session_id AS session_id + FROM + (SELECT session_replay_events.session_id AS session_id + FROM session_replay_events + WHERE equals(session_replay_events.team_id, 2) + GROUP BY session_replay_events.session_id) AS session_replay_events + WHERE ifNull(in(session_replay_events.session_id, ['s2']), 0) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelCorrelationsActors.test_funnel_correlation_on_event_with_recordings.2 + ''' + SELECT persons.id, + persons.id AS id, + source.matching_events AS matching_events + FROM + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons + INNER JOIN + (SELECT funnel_actors.actor_id AS actor_id, + any(funnel_actors.matching_events) AS matching_events + FROM events AS event + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS event__pdi ON equals(event.distinct_id, event__pdi.distinct_id) + JOIN + (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + avg(step_2_conversion_time) AS step_2_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + median(step_2_conversion_time) AS step_2_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(step_2_matching_event) AS step_2_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, + argMax(latest_0, steps) AS timestamp, + argMax(latest_2, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + step_2_conversion_time AS step_2_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + tuple(latest_2, uuid_2, `$session_id_2`, `$window_id_2`) AS step_2_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, if(isNull(latest_2), step_1_matching_event, step_2_matching_event))) AS final_matching_event, + latest_0 AS latest_0, + latest_2 AS latest_2, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, + step_2 AS step_2, + latest_2 AS latest_2, + uuid_2 AS uuid_2, + `$session_id_2` AS `$session_id_2`, + `$window_id_2` AS `$window_id_2`, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(latest_0, toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1)) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(latest_1, toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + tuple(latest_2, uuid_2, `$session_id_2`, `$window_id_2`) AS step_2_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, if(isNull(latest_2), step_1_matching_event, step_2_matching_event))) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, + last_value(uuid_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_2, + last_value(`$session_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_2`, + last_value(`$window_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_2` + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, + step_2 AS step_2, + if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2, + if(ifNull(less(latest_2, latest_1), 0), NULL, uuid_2) AS uuid_2, + if(ifNull(less(latest_2, latest_1), 0), NULL, `$session_id_2`) AS `$session_id_2`, + if(ifNull(less(latest_2, latest_1), 0), NULL, `$window_id_2`) AS `$window_id_2` + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, + last_value(uuid_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_2, + last_value(`$session_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_2`, + last_value(`$window_id_2`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_2` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS aggregation_target, + e.uuid AS uuid, + if(equals(e.event, '$pageview'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, + if(equals(e.event, 'insight analyzed'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`, + if(equals(e.event, 'insight updated'), 1, 0) AS step_2, + if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, + if(ifNull(equals(step_2, 1), 0), uuid, NULL) AS uuid_2, + if(ifNull(equals(step_2, 1), 0), e.`$session_id`, NULL) AS `$session_id_2`, + if(ifNull(equals(step_2, 1), 0), e.`$window_id`, NULL) AS `$window_id_2` + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('$pageview', 'insight analyzed', 'insight updated'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2, 3]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors ON equals(event__pdi.person_id, funnel_actors.actor_id) + WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC')))), notIn(event.event, ['$pageview', 'insight analyzed', 'insight updated']), equals(event.event, 'insight loaded'), ifNull(notEquals(funnel_actors.steps, 3), 1)) + GROUP BY actor_id + ORDER BY actor_id ASC) AS source ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelCorrelationsActors.test_funnel_correlation_on_event_with_recordings.3 + ''' + SELECT DISTINCT session_replay_events.session_id AS session_id + FROM + (SELECT session_replay_events.session_id AS session_id + FROM session_replay_events + WHERE equals(session_replay_events.team_id, 2) + GROUP BY session_replay_events.session_id) AS session_replay_events + WHERE ifNull(in(session_replay_events.session_id, ['s2']), 0) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelCorrelationsActors.test_funnel_correlation_on_properties_with_recordings + ''' + SELECT persons.id, + persons.id AS id, + source.matching_events AS matching_events + FROM + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons + INNER JOIN + (SELECT funnel_actors.actor_id AS actor_id, + any(funnel_actors.matching_events) AS matching_events + FROM + (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + last_value(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, + last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, + last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS aggregation_target, + e.uuid AS uuid, + if(equals(e.event, '$pageview'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, + if(equals(e.event, 'insight analyzed'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id AS id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'foo'), ''), 'null'), '^"|"$', '') AS properties___foo + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('$pageview', 'insight analyzed')), ifNull(equals(e__pdi__person.properties___foo, 'bar'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + WHERE ifNull(equals(funnel_actors.steps, 2), 0) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC) AS source ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelCorrelationsActors.test_funnel_correlation_on_properties_with_recordings.1 + ''' + SELECT DISTINCT session_replay_events.session_id AS session_id + FROM + (SELECT session_replay_events.session_id AS session_id + FROM session_replay_events + WHERE equals(session_replay_events.team_id, 2) + GROUP BY session_replay_events.session_id) AS session_replay_events + WHERE ifNull(in(session_replay_events.session_id, ['s2']), 0) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelCorrelationsActors.test_strict_funnel_correlation_with_recordings + ''' + SELECT persons.id, + persons.id AS id, + source.matching_events AS matching_events + FROM + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons + INNER JOIN + (SELECT funnel_actors.actor_id AS actor_id, + any(funnel_actors.matching_events) AS matching_events + FROM + (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS latest_1, + min(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS uuid_1, + min(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS `$session_id_1`, + min(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS aggregation_target, + e.uuid AS uuid, + if(equals(e.event, '$pageview'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, + if(equals(e.event, 'insight analyzed'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id AS id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'foo'), ''), 'null'), '^"|"$', '') AS properties___foo + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), ifNull(equals(e__pdi__person.properties___foo, 'bar'), 0)))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + WHERE ifNull(equals(funnel_actors.steps, 2), 0) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC) AS source ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelCorrelationsActors.test_strict_funnel_correlation_with_recordings.1 + ''' + SELECT DISTINCT session_replay_events.session_id AS session_id + FROM + (SELECT session_replay_events.session_id AS session_id + FROM session_replay_events + WHERE equals(session_replay_events.team_id, 2) + GROUP BY session_replay_events.session_id) AS session_replay_events + WHERE ifNull(in(session_replay_events.session_id, ['s2']), 0) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelCorrelationsActors.test_strict_funnel_correlation_with_recordings.2 + ''' + SELECT persons.id, + persons.id AS id, + source.matching_events AS matching_events + FROM + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons + INNER JOIN + (SELECT funnel_actors.actor_id AS actor_id, + any(funnel_actors.matching_events) AS matching_events + FROM + (SELECT aggregation_target AS actor_id, + final_matching_events AS matching_events, + timestamp AS timestamp, + steps AS steps, + final_timestamp AS final_timestamp, + first_timestamp AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + groupArray(10)(step_0_matching_event) AS step_0_matching_events, + groupArray(10)(step_1_matching_event) AS step_1_matching_events, + groupArray(10)(final_matching_event) AS final_matching_events, + argMax(latest_0, steps) AS timestamp, + argMax(latest_1, steps) AS final_timestamp, + argMax(latest_0, steps) AS first_timestamp + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, + latest_0 AS latest_0, + latest_1 AS latest_1, + latest_0 AS latest_0 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + latest_1 AS latest_1, + uuid_1 AS uuid_1, + `$session_id_1` AS `$session_id_1`, + `$window_id_1` AS `$window_id_1`, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, + tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, + if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + uuid_0 AS uuid_0, + `$session_id_0` AS `$session_id_0`, + `$window_id_0` AS `$window_id_0`, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS latest_1, + min(uuid_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS uuid_1, + min(`$session_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS `$session_id_1`, + min(`$window_id_1`) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS `$window_id_1` + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS aggregation_target, + e.uuid AS uuid, + if(equals(e.event, '$pageview'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, + if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, + if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, + if(equals(e.event, 'insight analyzed'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, + if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, + if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id AS id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'foo'), ''), 'null'), '^"|"$', '') AS properties___foo + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), ifNull(equals(e__pdi__person.properties___foo, 'bar'), 0)))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + WHERE ifNull(in(steps, [1, 2]), 0) + ORDER BY aggregation_target ASC) AS funnel_actors + WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) + GROUP BY funnel_actors.actor_id + ORDER BY funnel_actors.actor_id ASC) AS source ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelCorrelationsActors.test_strict_funnel_correlation_with_recordings.3 + ''' + SELECT DISTINCT session_replay_events.session_id AS session_id + FROM + (SELECT session_replay_events.session_id AS session_id + FROM session_replay_events + WHERE equals(session_replay_events.team_id, 2) + GROUP BY session_replay_events.session_id) AS session_replay_events + WHERE ifNull(in(session_replay_events.session_id, ['s3']), 0) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- diff --git a/posthog/hogql_queries/insights/funnels/test/test_funnel_correlation.py b/posthog/hogql_queries/insights/funnels/test/test_funnel_correlation.py index 176606ede5b4e..f69eb3c6977b6 100644 --- a/posthog/hogql_queries/insights/funnels/test/test_funnel_correlation.py +++ b/posthog/hogql_queries/insights/funnels/test/test_funnel_correlation.py @@ -1,30 +1,32 @@ -from typing import cast +from typing import Any, Dict, cast import unittest from rest_framework.exceptions import ValidationError -from ee.clickhouse.queries.funnels.funnel_correlation_persons import ( - FunnelCorrelationActors, -) from posthog.constants import INSIGHT_FUNNELS from posthog.hogql_queries.insights.funnels.funnel_correlation_query_runner import ( EventContingencyTable, EventStats, FunnelCorrelationQueryRunner, ) +from posthog.hogql_queries.insights.funnels.test.test_funnel_correlations_persons import get_actors from posthog.hogql_queries.legacy_compatibility.filter_to_query import filter_to_query from posthog.models.action import Action from posthog.models.action_step import ActionStep from posthog.models.element import Element -from posthog.models.filters import Filter from posthog.models.group.util import create_group from posthog.models.group_type_mapping import GroupTypeMapping from posthog.models.instance_setting import override_instance_config from posthog.schema import ( + EventPropertyFilter, + EventsNode, FunnelCorrelationQuery, FunnelsActorsQuery, FunnelsQuery, FunnelCorrelationResultsType, + GroupPropertyFilter, + PersonPropertyFilter, + PropertyOperator, ) from posthog.test.base import ( APIBaseTest, @@ -75,38 +77,38 @@ def _get_events_for_filters( result, skewed_totals, _, _ = FunnelCorrelationQueryRunner(query=correlation_query, team=self.team)._calculate() return result, skewed_totals - def _get_actors_for_event(self, filter: Filter, event_name: str, properties=None, success=True): - actor_filter = filter.shallow_clone( - { - "funnel_correlation_person_entity": { - "id": event_name, - "type": "events", - "properties": properties, - }, - "funnel_correlation_person_converted": "TrUe" if success else "falSE", - } + def _get_actors_for_event(self, filters: Dict[str, Any], event_name: str, properties=None, success=True): + serialized_actors = get_actors( + filters, + self.team, + funnelCorrelationPersonConverted=success, + funnelCorrelationPersonEntity=EventsNode(event=event_name, properties=properties), ) + return [str(row[0]) for row in serialized_actors] - _, serialized_actors, _ = FunnelCorrelationActors(actor_filter, self.team).get_actors() - return [str(row["id"]) for row in serialized_actors] + def _get_actors_for_property( + self, filters: Dict[str, Any], property_values: list, success=True, funnelCorrelationNames=None + ): + funnelCorrelationPropertyValues = [ + ( + PersonPropertyFilter(key=prop, value=value, operator=PropertyOperator.exact) + if type == "person" + else GroupPropertyFilter( + key=prop, value=value, group_type_index=group_type_index, operator=PropertyOperator.exact + ) + ) + for prop, value, type, group_type_index in property_values + ] - def _get_actors_for_property(self, filter: Filter, property_values: list, success=True): - actor_filter = filter.shallow_clone( - { - "funnel_correlation_property_values": [ - { - "key": prop, - "value": value, - "type": type, - "group_type_index": group_type_index, - } - for prop, value, type, group_type_index in property_values - ], - "funnel_correlation_person_converted": "TrUe" if success else "falSE", - } + serialized_actors = get_actors( + filters, + self.team, + funnelCorrelationType=FunnelCorrelationResultsType.properties, + funnelCorrelationNames=funnelCorrelationNames, + funnelCorrelationPersonConverted=success, + funnelCorrelationPropertyValues=funnelCorrelationPropertyValues, ) - _, serialized_actors, _ = FunnelCorrelationActors(actor_filter, self.team).get_actors() - return [str(row["id"]) for row in serialized_actors] + return [str(row[0]) for row in serialized_actors] def test_basic_funnel_correlation_with_events(self): filters = { @@ -185,16 +187,16 @@ def test_basic_funnel_correlation_with_events(self): ], ) - # self.assertEqual(len(self._get_actors_for_event(filters, "positively_related")), 5) - # self.assertEqual( - # len(self._get_actors_for_event(filters, "positively_related", success=False)), - # 0, - # ) - # self.assertEqual( - # len(self._get_actors_for_event(filters, "negatively_related", success=False)), - # 5, - # ) - # self.assertEqual(len(self._get_actors_for_event(filters, "negatively_related")), 0) + self.assertEqual(len(self._get_actors_for_event(filters, "positively_related")), 5) + self.assertEqual( + len(self._get_actors_for_event(filters, "positively_related", success=False)), + 0, + ) + self.assertEqual( + len(self._get_actors_for_event(filters, "negatively_related", success=False)), + 5, + ) + self.assertEqual(len(self._get_actors_for_event(filters, "negatively_related")), 0) # Now exclude positively_related result, _ = self._get_events_for_filters( @@ -221,16 +223,16 @@ def test_basic_funnel_correlation_with_events(self): ], ) # Getting specific people isn't affected by exclude_events - # self.assertEqual(len(self._get_actors_for_event(filter, "positively_related")), 5) - # self.assertEqual( - # len(self._get_actors_for_event(filter, "positively_related", success=False)), - # 0, - # ) - # self.assertEqual( - # len(self._get_actors_for_event(filter, "negatively_related", success=False)), - # 5, - # ) - # self.assertEqual(len(self._get_actors_for_event(filter, "negatively_related")), 0) + self.assertEqual(len(self._get_actors_for_event(filters, "positively_related")), 5) + self.assertEqual( + len(self._get_actors_for_event(filters, "positively_related", success=False)), + 0, + ) + self.assertEqual( + len(self._get_actors_for_event(filters, "negatively_related", success=False)), + 5, + ) + self.assertEqual(len(self._get_actors_for_event(filters, "negatively_related")), 0) @snapshot_clickhouse_queries def test_action_events_are_excluded_from_correlations(self): @@ -444,16 +446,16 @@ def test_funnel_correlation_with_events_and_groups(self): ], ) - # self.assertEqual(len(self._get_actors_for_event(filter, "positively_related")), 5) - # self.assertEqual( - # len(self._get_actors_for_event(filter, "positively_related", success=False)), - # 0, - # ) - # self.assertEqual(len(self._get_actors_for_event(filter, "negatively_related")), 1) - # self.assertEqual( - # len(self._get_actors_for_event(filter, "negatively_related", success=False)), - # 1, - # ) + self.assertEqual(len(self._get_actors_for_event(filters, "positively_related")), 5) + self.assertEqual( + len(self._get_actors_for_event(filters, "positively_related", success=False)), + 0, + ) + self.assertEqual(len(self._get_actors_for_event(filters, "negatively_related")), 1) + self.assertEqual( + len(self._get_actors_for_event(filters, "negatively_related", success=False)), + 1, + ) # Now exclude all groups in positive excludes = { @@ -469,30 +471,30 @@ def test_funnel_correlation_with_events_and_groups(self): result, _ = self._get_events_for_filters({**filters, **excludes}) # TODO destructure - # odds_ratio = result[0].pop("odds_ratio") - # expected_odds_ratio = 1 - # # success total and failure totals remove other groups too + odds_ratio = result[0].pop("odds_ratio") + expected_odds_ratio = 1 + # success total and failure totals remove other groups too - # self.assertAlmostEqual(odds_ratio, expected_odds_ratio) + self.assertAlmostEqual(odds_ratio, expected_odds_ratio) - # self.assertEqual( - # result, - # [ - # { - # "event": "negatively_related", - # "success_count": 1, - # "failure_count": 1, - # # "odds_ratio": 1, - # "correlation_type": "failure", - # } - # ], - # ) + self.assertEqual( + result, + [ + { + "event": "negatively_related", + "success_count": 1, + "failure_count": 1, + # "odds_ratio": 1, + "correlation_type": "failure", + } + ], + ) - # self.assertEqual(len(self._get_actors_for_event(filter, "negatively_related")), 1) - # self.assertEqual( - # len(self._get_actors_for_event(filter, "negatively_related", success=False)), - # 1, - # ) + self.assertEqual(len(self._get_actors_for_event(filters, "negatively_related")), 1) + self.assertEqual( + len(self._get_actors_for_event(filters, "negatively_related", success=False)), + 1, + ) # :FIXME: This should also work with materialized columns # @also_test_with_materialized_columns(event_properties=[], person_properties=["$browser"]) @@ -626,22 +628,38 @@ def test_basic_funnel_correlation_with_properties(self): ], ) - # self.assertEqual( - # len(self._get_actors_for_property(filter, [("$browser", "Positive", "person", None)])), - # 10, - # ) - # self.assertEqual( - # len(self._get_actors_for_property(filter, [("$browser", "Positive", "person", None)], False)), - # 1, - # ) - # self.assertEqual( - # len(self._get_actors_for_property(filter, [("$browser", "Negative", "person", None)])), - # 1, - # ) - # self.assertEqual( - # len(self._get_actors_for_property(filter, [("$browser", "Negative", "person", None)], False)), - # 10, - # ) + self.assertEqual( + len( + self._get_actors_for_property( + filters, [("$browser", "Positive", "person", None)], funnelCorrelationNames=["$browser"] + ) + ), + 10, + ) + self.assertEqual( + len( + self._get_actors_for_property( + filters, [("$browser", "Positive", "person", None)], False, funnelCorrelationNames=["$browser"] + ) + ), + 1, + ) + self.assertEqual( + len( + self._get_actors_for_property( + filters, [("$browser", "Negative", "person", None)], funnelCorrelationNames=["$browser"] + ) + ), + 1, + ) + self.assertEqual( + len( + self._get_actors_for_property( + filters, [("$browser", "Negative", "person", None)], False, funnelCorrelationNames=["$browser"] + ) + ), + 10, + ) # TODO: Delete this test when moved to person-on-events @also_test_with_materialized_columns( @@ -809,22 +827,26 @@ def test_funnel_correlation_with_properties_and_groups(self): ], ) - # self.assertEqual( - # len(self._get_actors_for_property(filter, [("industry", "positive", "group", 0)])), - # 10, - # ) - # self.assertEqual( - # len(self._get_actors_for_property(filter, [("industry", "positive", "group", 0)], False)), - # 1, - # ) - # self.assertEqual( - # len(self._get_actors_for_property(filter, [("industry", "negative", "group", 0)])), - # 1, - # ) - # self.assertEqual( - # len(self._get_actors_for_property(filter, [("industry", "negative", "group", 0)], False)), - # 10, - # ) + self.assertEqual( + len( + self._get_actors_for_property( + filters, [("industry", "positive", "group", 0)], funnelCorrelationNames=["industry"] + ) + ), + 10, + ) + self.assertEqual( + len(self._get_actors_for_property(filters, [("industry", "positive", "group", 0)], False)), + 1, + ) + self.assertEqual( + len(self._get_actors_for_property(filters, [("industry", "negative", "group", 0)])), + 1, + ) + self.assertEqual( + len(self._get_actors_for_property(filters, [("industry", "negative", "group", 0)], False)), + 10, + ) # test with `$all` as property # _run property correlation with filter on all properties @@ -1011,22 +1033,22 @@ def test_funnel_correlation_with_properties_and_groups_person_on_events(self): ], ) - # self.assertEqual( - # len(self._get_actors_for_property(filter, [("industry", "positive", "group", 0)])), - # 10, - # ) - # self.assertEqual( - # len(self._get_actors_for_property(filter, [("industry", "positive", "group", 0)], False)), - # 1, - # ) - # self.assertEqual( - # len(self._get_actors_for_property(filter, [("industry", "negative", "group", 0)])), - # 1, - # ) - # self.assertEqual( - # len(self._get_actors_for_property(filter, [("industry", "negative", "group", 0)], False)), - # 10, - # ) + self.assertEqual( + len(self._get_actors_for_property(filters, [("industry", "positive", "group", 0)])), + 10, + ) + self.assertEqual( + len(self._get_actors_for_property(filters, [("industry", "positive", "group", 0)], False)), + 1, + ) + self.assertEqual( + len(self._get_actors_for_property(filters, [("industry", "negative", "group", 0)])), + 1, + ) + self.assertEqual( + len(self._get_actors_for_property(filters, [("industry", "negative", "group", 0)], False)), + 10, + ) # test with `$all` as property # _run property correlation with filter on all properties @@ -1195,7 +1217,7 @@ def test_correlation_with_multiple_properties(self): "date_to": "2020-01-14", } - #  5 successful people with both properties + # 5 successful people with both properties for i in range(5): _create_person( distinct_ids=[f"user_{i}"], @@ -1215,7 +1237,7 @@ def test_correlation_with_multiple_properties(self): timestamp="2020-01-04T14:00:00Z", ) - #  10 successful people with some different properties + # 10 successful people with some different properties for i in range(5, 15): _create_person( distinct_ids=[f"user_{i}"], @@ -1388,18 +1410,30 @@ def test_correlation_with_multiple_properties(self): self.assertEqual(new_result, new_expected_result) + self.assertEqual( + len( + self._get_actors_for_property( + filters, [("$nice", "not", "person", None)], funnelCorrelationNames=["$browser", "$nice"] + ) + ), + 10, + ) # self.assertEqual( - # len(self._get_actors_for_property(filter, [("$nice", "not", "person", None)])), - # 10, - # ) - # self.assertEqual( - # len(self._get_actors_for_property(filter, [("$nice", "", "person", None)], False)), + # len( + # self._get_actors_for_property( + # filters, [("$nice", "", "person", None)], False, funnelCorrelationNames=["$browser", "$nice"] + # ) + # ), # 1, # ) - # self.assertEqual( - # len(self._get_actors_for_property(filter, [("$nice", "very", "person", None)])), - # 5, - # ) + self.assertEqual( + len( + self._get_actors_for_property( + filters, [("$nice", "very", "person", None)], funnelCorrelationNames=["$browser", "$nice"] + ) + ), + 5, + ) def test_discarding_insignificant_events(self): filters = { @@ -1642,22 +1676,48 @@ def test_funnel_correlation_with_event_properties(self): ], ) - # self.assertEqual( - # len(self._get_actors_for_event(filter, "positively_related", {"blah": "value_bleh"})), - # 5, - # ) - # self.assertEqual( - # len(self._get_actors_for_event(filter, "positively_related", {"signup_source": "facebook"})), - # 3, - # ) - # self.assertEqual( - # len(self._get_actors_for_event(filter, "positively_related", {"signup_source": "facebook"}, False)), - # 0, - # ) - # self.assertEqual( - # len(self._get_actors_for_event(filter, "negatively_related", {"signup_source": "email"}, False)), - # 3, - # ) + self.assertEqual( + len( + self._get_actors_for_event( + filters, + "positively_related", + [EventPropertyFilter(operator=PropertyOperator.exact, key="blah", value="value_bleh")], + ) + ), + 5, + ) + self.assertEqual( + len( + self._get_actors_for_event( + filters, + "positively_related", + [EventPropertyFilter(operator=PropertyOperator.exact, key="signup_source", value="facebook")], + ) + ), + 3, + ) + self.assertEqual( + len( + self._get_actors_for_event( + filters, + "positively_related", + [EventPropertyFilter(operator=PropertyOperator.exact, key="signup_source", value="facebook")], + False, + ) + ), + 0, + ) + self.assertEqual( + len( + self._get_actors_for_event( + filters, + "negatively_related", + [EventPropertyFilter(operator=PropertyOperator.exact, key="signup_source", value="email")], + False, + ) + ), + 3, + ) @also_test_with_materialized_columns(["blah", "signup_source"], verify_no_jsonextract=False) @snapshot_clickhouse_queries @@ -1846,16 +1906,28 @@ def test_funnel_correlation_with_event_properties_exclusions(self): ], ) - # self.assertEqual( - # len(self._get_actors_for_event(filter, "positively_related", {"blah": "value_bleh"})), - # 3, - # ) + self.assertEqual( + len( + self._get_actors_for_event( + filters, + "positively_related", + [EventPropertyFilter(operator=PropertyOperator.exact, key="blah", value="value_bleh")], + ) + ), + 3, + ) - # # If you search for persons with a specific property, even if excluded earlier, you should get them - # self.assertEqual( - # len(self._get_actors_for_event(filter, "positively_related", {"signup_source": "facebook"})), - # 3, - # ) + # If you search for persons with a specific property, even if excluded earlier, you should get them + self.assertEqual( + len( + self._get_actors_for_event( + filters, + "positively_related", + [EventPropertyFilter(operator=PropertyOperator.exact, key="signup_source", value="facebook")], + ) + ), + 3, + ) # :FIXME: This should also work with materialized columns # @also_test_with_materialized_columns(["$event_type", "signup_source"]) diff --git a/posthog/hogql_queries/insights/funnels/test/test_funnel_correlations_persons.py b/posthog/hogql_queries/insights/funnels/test/test_funnel_correlations_persons.py new file mode 100644 index 0000000000000..f324dcfcf7c3a --- /dev/null +++ b/posthog/hogql_queries/insights/funnels/test/test_funnel_correlations_persons.py @@ -0,0 +1,647 @@ +from typing import Any, Dict, Optional, cast +from datetime import datetime, timedelta +from uuid import UUID + +from django.utils import timezone +from freezegun import freeze_time + +from posthog.constants import INSIGHT_FUNNELS +from posthog.hogql_queries.actors_query_runner import ActorsQueryRunner +from posthog.hogql_queries.legacy_compatibility.filter_to_query import filter_to_query +from posthog.models.team.team import Team +from posthog.schema import ( + ActorsQuery, + EventsNode, + FunnelCorrelationActorsQuery, + FunnelCorrelationQuery, + FunnelCorrelationResultsType, + FunnelsActorsQuery, + FunnelsQuery, +) +from posthog.session_recordings.queries.test.session_replay_sql import ( + produce_replay_summary, +) +from posthog.test.base import ( + APIBaseTest, + ClickhouseTestMixin, + _create_event, + _create_person, + snapshot_clickhouse_queries, +) +from posthog.test.test_journeys import journeys_for + +FORMAT_TIME = "%Y-%m-%d 00:00:00" +MAX_STEP_COLUMN = 0 +COUNT_COLUMN = 1 +PERSON_ID_COLUMN = 2 + + +def get_actors( + filters: Dict[str, Any], + team: Team, + funnelCorrelationType: Optional[FunnelCorrelationResultsType] = FunnelCorrelationResultsType.events, + funnelCorrelationNames=None, + funnelCorrelationPersonConverted: Optional[bool] = None, + funnelCorrelationPersonEntity: Optional[EventsNode] = None, + funnelCorrelationPropertyValues=None, + includeRecordings: Optional[bool] = True, +): + funnels_query = cast(FunnelsQuery, filter_to_query(filters)) + funnel_actors_query = FunnelsActorsQuery(source=funnels_query, includeRecordings=includeRecordings) + correlation_query = FunnelCorrelationQuery( + source=funnel_actors_query, + funnelCorrelationType=(funnelCorrelationType or FunnelCorrelationResultsType.events), + funnelCorrelationNames=funnelCorrelationNames, + # funnelCorrelationExcludeNames=funnelCorrelationExcludeNames, + # funnelCorrelationExcludeEventNames=funnelCorrelationExcludeEventNames, + # funnelCorrelationEventNames=funnelCorrelationEventNames, + # funnelCorrelationEventExcludePropertyNames=funnelCorrelationEventExcludePropertyNames, + ) + correlation_actors_query = FunnelCorrelationActorsQuery( + source=correlation_query, + funnelCorrelationPersonConverted=funnelCorrelationPersonConverted, + funnelCorrelationPersonEntity=funnelCorrelationPersonEntity, + funnelCorrelationPropertyValues=funnelCorrelationPropertyValues, + ) + persons_select = ["id", "person", *(["matched_recordings"] if includeRecordings else [])] + groups_select = ["actor_id"] + actors_query = ActorsQuery( + source=correlation_actors_query, + select=persons_select if funnels_query.aggregation_group_type_index is None else groups_select, + ) + response = ActorsQueryRunner(query=actors_query, team=team).calculate() + return response.results + + +class TestFunnelCorrelationsActors(ClickhouseTestMixin, APIBaseTest): + maxDiff = None + + def _setup_basic_test(self): + filters = { + "events": [ + {"id": "user signed up", "type": "events", "order": 0}, + {"id": "paid", "type": "events", "order": 1}, + ], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-14", + } + + success_target_persons = [] + failure_target_persons = [] + events_by_person = {} + for i in range(10): + person_id = f"user_{i}" + person = _create_person(distinct_ids=[person_id], team_id=self.team.pk) + events_by_person[person_id] = [{"event": "user signed up", "timestamp": datetime(2020, 1, 2, 14)}] + + if i % 2 == 0: + events_by_person[person_id].append( + { + "event": "positively_related", + "timestamp": datetime(2020, 1, 3, 14), + } + ) + + success_target_persons.append(str(person.uuid)) + + events_by_person[person_id].append({"event": "paid", "timestamp": datetime(2020, 1, 4, 14)}) + + for i in range(10, 20): + person_id = f"user_{i}" + person = _create_person(distinct_ids=[person_id], team_id=self.team.pk) + events_by_person[person_id] = [{"event": "user signed up", "timestamp": datetime(2020, 1, 2, 14)}] + if i % 2 == 0: + events_by_person[person_id].append( + { + "event": "negatively_related", + "timestamp": datetime(2020, 1, 3, 14), + } + ) + failure_target_persons.append(str(person.uuid)) + + # One positively_related as failure + person_fail_id = f"user_fail" + person_fail = _create_person(distinct_ids=[person_fail_id], team_id=self.team.pk) + events_by_person[person_fail_id] = [ + {"event": "user signed up", "timestamp": datetime(2020, 1, 2, 14)}, + {"event": "positively_related", "timestamp": datetime(2020, 1, 3, 14)}, + ] + + # One negatively_related as success + person_success_id = f"user_succ" + person_succ = _create_person(distinct_ids=[person_success_id], team_id=self.team.pk) + events_by_person[person_success_id] = [ + {"event": "user signed up", "timestamp": datetime(2020, 1, 2, 14)}, + {"event": "negatively_related", "timestamp": datetime(2020, 1, 3, 14)}, + {"event": "paid", "timestamp": datetime(2020, 1, 4, 14)}, + ] + journeys_for(events_by_person, self.team, create_people=False) + + return ( + filters, + success_target_persons, + failure_target_persons, + person_fail, + person_succ, + ) + + def test_basic_funnel_correlation_with_events(self): + ( + filters, + success_target_persons, + failure_target_persons, + person_fail, + person_succ, + ) = self._setup_basic_test() + + # test positively_related successes + serialized_actors = get_actors( + filters, + self.team, + funnelCorrelationPersonConverted=True, + funnelCorrelationPersonEntity=EventsNode(event="positively_related"), + ) + + self.assertCountEqual([str(val[1]["id"]) for val in serialized_actors], success_target_persons) + + # test negatively_related failures + serialized_actors = get_actors( + filters, + self.team, + funnelCorrelationPersonConverted=False, + funnelCorrelationPersonEntity=EventsNode(event="negatively_related"), + ) + + self.assertCountEqual([str(val[1]["id"]) for val in serialized_actors], failure_target_persons) + + # test positively_related failures + serialized_actors = get_actors( + filters, + self.team, + funnelCorrelationPersonConverted=False, + funnelCorrelationPersonEntity=EventsNode(event="positively_related"), + ) + + self.assertCountEqual([str(val[1]["id"]) for val in serialized_actors], [str(person_fail.uuid)]) + + # test negatively_related successes + serialized_actors = get_actors( + filters, + self.team, + funnelCorrelationPersonConverted=True, + funnelCorrelationPersonEntity=EventsNode(event="negatively_related"), + ) + + self.assertCountEqual([str(val[1]["id"]) for val in serialized_actors], [str(person_succ.uuid)]) + + # test all positively_related + serialized_actors = get_actors( + filters, + self.team, + funnelCorrelationPersonConverted=None, + funnelCorrelationPersonEntity=EventsNode(event="positively_related"), + ) + + self.assertCountEqual( + [str(val[1]["id"]) for val in serialized_actors], + [*success_target_persons, str(person_fail.uuid)], + ) + + # test all negatively_related + serialized_actors = get_actors( + filters, + self.team, + funnelCorrelationPersonConverted=None, + funnelCorrelationPersonEntity=EventsNode(event="negatively_related"), + ) + + self.assertCountEqual( + [str(val[1]["id"]) for val in serialized_actors], + [*failure_target_persons, str(person_succ.uuid)], + ) + + # @patch("posthog.tasks.calculate_cohort.insert_cohort_from_insight_filter.delay") + # def test_create_funnel_correlation_cohort(self, _insert_cohort_from_insight_filter): + # ( + # filter, + # success_target_persons, + # failure_target_persons, + # person_fail, + # person_succ, + # ) = self._setup_basic_test() + + # params = { + # "events": [ + # {"id": "user signed up", "type": "events", "order": 0}, + # {"id": "paid", "type": "events", "order": 1}, + # ], + # "insight": INSIGHT_FUNNELS, + # "date_from": "2020-01-01", + # "date_to": "2020-01-14", + # "funnel_correlation_type": "events", + # "funnel_correlation_person_entity": { + # "id": "positively_related", + # "type": "events", + # }, + # "funnel_correlation_person_converted": "TrUe", + # } + + # response = self.client.post( + # f"/api/projects/{self.team.id}/cohorts/?{urllib.parse.urlencode(params)}", + # {"name": "test", "is_static": True}, + # ).json() + + # cohort_id = response["id"] + + # _insert_cohort_from_insight_filter.assert_called_once_with( + # cohort_id, + # { + # "events": "[{'id': 'user signed up', 'type': 'events', 'order': 0}, {'id': 'paid', 'type': 'events', 'order': 1}]", + # "insight": "FUNNELS", + # "date_from": "2020-01-01", + # "date_to": "2020-01-14", + # "funnel_correlation_type": "events", + # "funnel_correlation_person_entity": "{'id': 'positively_related', 'type': 'events'}", + # "funnel_correlation_person_converted": "TrUe", + # }, + # ) + + # insert_cohort_from_insight_filter(cohort_id, params) + + # cohort = Cohort.objects.get(pk=cohort_id) + # people = Person.objects.filter(cohort__id=cohort.pk) + # self.assertEqual(cohort.errors_calculating, 0) + # self.assertEqual(people.count(), 5) + # self.assertEqual(cohort.count, 5) + + def test_people_arent_returned_multiple_times(self): + people = journeys_for( + { + "user_1": [ + {"event": "user signed up", "timestamp": datetime(2020, 1, 2, 14)}, + { + "event": "positively_related", + "timestamp": datetime(2020, 1, 3, 14), + }, + # duplicate event + { + "event": "positively_related", + "timestamp": datetime(2020, 1, 3, 14), + }, + {"event": "paid", "timestamp": datetime(2020, 1, 4, 14)}, + ] + }, + self.team, + ) + + filters = { + "events": [ + {"id": "user signed up", "type": "events", "order": 0}, + {"id": "paid", "type": "events", "order": 1}, + ], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-14", + } + + serialized_actors = get_actors( + filters, + self.team, + funnelCorrelationPersonConverted=True, + funnelCorrelationPersonEntity=EventsNode(event="positively_related"), + ) + + self.assertCountEqual([str(val[1]["id"]) for val in serialized_actors], [str(people["user_1"].uuid)]) + + @snapshot_clickhouse_queries + @freeze_time("2021-01-02 00:00:00.000Z") + def test_funnel_correlation_on_event_with_recordings(self): + p1 = _create_person(distinct_ids=["user_1"], team=self.team, properties={"foo": "bar"}) + _create_event( + event="$pageview", + distinct_id="user_1", + team=self.team, + timestamp=timezone.now(), + properties={"$session_id": "s2", "$window_id": "w1"}, + event_uuid="11111111-1111-1111-1111-111111111111", + ) + _create_event( + event="insight loaded", + distinct_id="user_1", + team=self.team, + timestamp=(timezone.now() + timedelta(minutes=2)), + properties={"$session_id": "s2", "$window_id": "w2"}, + event_uuid="31111111-1111-1111-1111-111111111111", + ) + _create_event( + event="insight analyzed", + distinct_id="user_1", + team=self.team, + timestamp=(timezone.now() + timedelta(minutes=3)), + properties={"$session_id": "s2", "$window_id": "w2"}, + event_uuid="21111111-1111-1111-1111-111111111111", + ) + + timestamp = datetime(2021, 1, 2, 0, 0, 0) + produce_replay_summary( + team_id=self.team.pk, + session_id="s2", + distinct_id="user_1", + first_timestamp=timestamp, + last_timestamp=timestamp, + ) + + # Success filter + filters = { + "insight": INSIGHT_FUNNELS, + "date_from": "2021-01-01", + "date_to": "2021-01-08", + "events": [ + {"id": "$pageview", "order": 0}, + {"id": "insight analyzed", "order": 1}, + ], + } + + results = get_actors( + filters, + self.team, + funnelCorrelationPersonConverted=True, + funnelCorrelationPersonEntity=EventsNode(event="insight loaded"), + ) + + self.assertEqual(results[0][1]["id"], p1.uuid) + self.assertEqual( + list(results[0][2]), + [ + { + "events": [ + { + "timestamp": timezone.now() + timedelta(minutes=3), + "uuid": UUID("21111111-1111-1111-1111-111111111111"), + "window_id": "w2", + } + ], + "session_id": "s2", + } + ], + ) + + # Drop off filter + filters = { + "insight": INSIGHT_FUNNELS, + "date_from": "2021-01-01", + "date_to": "2021-01-08", + "funnel_correlation_type": "events", + "events": [ + {"id": "$pageview", "order": 0}, + {"id": "insight analyzed", "order": 1}, + {"id": "insight updated", "order": 2}, + ], + } + results = get_actors( + filters, + self.team, + funnelCorrelationPersonConverted=False, + funnelCorrelationPersonEntity=EventsNode(event="insight loaded"), + ) + + self.assertEqual(results[0][1]["id"], p1.uuid) + self.assertEqual( + list(results[0][2]), + [ + { + "events": [ + { + "timestamp": timezone.now() + timedelta(minutes=3), + "uuid": UUID("21111111-1111-1111-1111-111111111111"), + "window_id": "w2", + } + ], + "session_id": "s2", + } + ], + ) + + @snapshot_clickhouse_queries + @freeze_time("2021-01-02 00:00:00.000Z") + def test_funnel_correlation_on_properties_with_recordings(self): + p1 = _create_person(distinct_ids=["user_1"], team=self.team, properties={"foo": "bar"}) + _create_event( + event="$pageview", + distinct_id="user_1", + team=self.team, + timestamp=timezone.now(), + properties={"$session_id": "s2", "$window_id": "w1"}, + event_uuid="11111111-1111-1111-1111-111111111111", + ) + _create_event( + event="insight analyzed", + distinct_id="user_1", + team=self.team, + timestamp=(timezone.now() + timedelta(minutes=3)), + properties={"$session_id": "s2", "$window_id": "w2"}, + event_uuid="21111111-1111-1111-1111-111111111111", + ) + + timestamp = datetime(2021, 1, 2, 0, 0, 0) + produce_replay_summary( + team_id=self.team.pk, + session_id="s2", + distinct_id="user_1", + first_timestamp=timestamp, + last_timestamp=timestamp, + ) + + # Success filter + filters = { + "insight": INSIGHT_FUNNELS, + "date_from": "2021-01-01", + "date_to": "2021-01-08", + "events": [ + {"id": "$pageview", "order": 0}, + {"id": "insight analyzed", "order": 1}, + ], + } + results = get_actors( + filters, + self.team, + funnelCorrelationType=FunnelCorrelationResultsType.properties, + funnelCorrelationPersonConverted=True, + funnelCorrelationPropertyValues=[ + { + "key": "foo", + "value": "bar", + "operator": "exact", + "type": "person", + } + ], + ) + + self.assertEqual(results[0][1]["id"], p1.uuid) + self.assertEqual( + list(results[0][2]), + [ + { + "events": [ + { + "timestamp": timezone.now() + timedelta(minutes=3), + "uuid": UUID("21111111-1111-1111-1111-111111111111"), + "window_id": "w2", + } + ], + "session_id": "s2", + } + ], + ) + + @snapshot_clickhouse_queries + @freeze_time("2021-01-02 00:00:00.000Z") + def test_strict_funnel_correlation_with_recordings(self): + # First use that successfully completes the strict funnel + p1 = _create_person(distinct_ids=["user_1"], team=self.team, properties={"foo": "bar"}) + _create_event( + event="$pageview", + distinct_id="user_1", + team=self.team, + timestamp=timezone.now(), + properties={"$session_id": "s2", "$window_id": "w1"}, + event_uuid="11111111-1111-1111-1111-111111111111", + ) + _create_event( + event="insight analyzed", + distinct_id="user_1", + team=self.team, + timestamp=(timezone.now() + timedelta(minutes=3)), + properties={"$session_id": "s2", "$window_id": "w2"}, + event_uuid="31111111-1111-1111-1111-111111111111", + ) + _create_event( + event="insight analyzed", # Second event should not be returned + distinct_id="user_1", + team=self.team, + timestamp=(timezone.now() + timedelta(minutes=4)), + properties={"$session_id": "s2", "$window_id": "w2"}, + event_uuid="41111111-1111-1111-1111-111111111111", + ) + timestamp = datetime(2021, 1, 2, 0, 0, 0) + produce_replay_summary( + team_id=self.team.pk, + session_id="s2", + distinct_id="user_1", + first_timestamp=timestamp, + last_timestamp=timestamp, + ) + + # Second user with strict funnel drop off, but completed the step events for a normal funnel + p2 = _create_person(distinct_ids=["user_2"], team=self.team, properties={"foo": "bar"}) + _create_event( + event="$pageview", + distinct_id="user_2", + team=self.team, + timestamp=timezone.now(), + properties={"$session_id": "s3", "$window_id": "w1"}, + event_uuid="51111111-1111-1111-1111-111111111111", + ) + _create_event( + event="insight loaded", # Interupting event + distinct_id="user_2", + team=self.team, + timestamp=(timezone.now() + timedelta(minutes=3)), + properties={"$session_id": "s3", "$window_id": "w2"}, + event_uuid="61111111-1111-1111-1111-111111111111", + ) + _create_event( + event="insight analyzed", + distinct_id="user_2", + team=self.team, + timestamp=(timezone.now() + timedelta(minutes=4)), + properties={"$session_id": "s3", "$window_id": "w2"}, + event_uuid="71111111-1111-1111-1111-111111111111", + ) + timestamp1 = datetime(2021, 1, 2, 0, 0, 0) + produce_replay_summary( + team_id=self.team.pk, + session_id="s3", + distinct_id="user_2", + first_timestamp=timestamp1, + last_timestamp=timestamp1, + ) + + # Success filter + filters = { + "insight": INSIGHT_FUNNELS, + "date_from": "2021-01-01", + "date_to": "2021-01-08", + "funnel_order_type": "strict", + "events": [ + {"id": "$pageview", "order": 0}, + {"id": "insight analyzed", "order": 1}, + ], + } + + results = get_actors( + filters, + self.team, + funnelCorrelationType=FunnelCorrelationResultsType.properties, + funnelCorrelationPersonConverted=True, + funnelCorrelationPropertyValues=[ + { + "key": "foo", + "value": "bar", + "operator": "exact", + "type": "person", + } + ], + ) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0][1]["id"], p1.uuid) + self.assertEqual( + list(results[0][2]), + [ + { + "events": [ + { + "timestamp": timezone.now() + timedelta(minutes=3), + "uuid": UUID("31111111-1111-1111-1111-111111111111"), + "window_id": "w2", + } + ], + "session_id": "s2", + } + ], + ) + + # Drop off filter + results = get_actors( + filters, + self.team, + funnelCorrelationType=FunnelCorrelationResultsType.properties, + funnelCorrelationPersonConverted=False, + funnelCorrelationPropertyValues=[ + { + "key": "foo", + "value": "bar", + "operator": "exact", + "type": "person", + } + ], + ) + + self.assertEqual(results[0][1]["id"], p2.uuid) + self.assertEqual( + list(results[0][2]), + [ + { + "events": [ + { + "timestamp": timezone.now(), + "uuid": UUID("51111111-1111-1111-1111-111111111111"), + "window_id": "w1", + } + ], + "session_id": "s3", + } + ], + ) diff --git a/posthog/hogql_queries/insights/funnels/utils.py b/posthog/hogql_queries/insights/funnels/utils.py index ff7a52db0a1f1..316b33886efb3 100644 --- a/posthog/hogql_queries/insights/funnels/utils.py +++ b/posthog/hogql_queries/insights/funnels/utils.py @@ -5,8 +5,6 @@ from posthog.schema import FunnelConversionWindowTimeUnit, FunnelVizType, FunnelsFilter, StepOrderValue from rest_framework.exceptions import ValidationError -from posthog.settings.ee import EE_AVAILABLE - def get_funnel_order_class(funnelsFilter: FunnelsFilter): from posthog.hogql_queries.insights.funnels import ( @@ -30,20 +28,7 @@ def get_funnel_actor_class(funnelsFilter: FunnelsFilter): FunnelTrendsActors, ) - # if filter.correlation_person_entity and EE_AVAILABLE: - if False: - if EE_AVAILABLE: # type: ignore - # from ee.clickhouse.queries.funnels.funnel_correlation_persons import ( - # FunnelCorrelationActors, - # ) - - return FunnelActors - # return FunnelCorrelationActors - else: - raise ValueError( - "Funnel Correlations is not available without an enterprise license and enterprise supported deployment" - ) - elif funnelsFilter.funnelVizType == FunnelVizType.trends: + if funnelsFilter.funnelVizType == FunnelVizType.trends: return FunnelTrendsActors else: if funnelsFilter.funnelOrderType == StepOrderValue.unordered: diff --git a/posthog/hogql_queries/insights/insight_actors_query_runner.py b/posthog/hogql_queries/insights/insight_actors_query_runner.py index 57b1c8630c45f..782dd5b054a0e 100644 --- a/posthog/hogql_queries/insights/insight_actors_query_runner.py +++ b/posthog/hogql_queries/insights/insight_actors_query_runner.py @@ -3,6 +3,7 @@ from posthog.hogql import ast from posthog.hogql.query import execute_hogql_query +from posthog.hogql_queries.insights.funnels.funnel_correlation_query_runner import FunnelCorrelationQueryRunner from posthog.hogql_queries.insights.funnels.funnels_query_runner import FunnelsQueryRunner from posthog.hogql_queries.insights.lifecycle_query_runner import LifecycleQueryRunner from posthog.hogql_queries.insights.paths_query_runner import PathsQueryRunner @@ -11,7 +12,15 @@ from posthog.hogql_queries.insights.trends.trends_query_runner import TrendsQueryRunner from posthog.hogql_queries.query_runner import QueryRunner, get_query_runner from posthog.models.filters.mixins.utils import cached_property -from posthog.schema import FunnelsActorsQuery, InsightActorsQuery, HogQLQueryResponse, StickinessQuery, TrendsQuery +from posthog.schema import ( + FunnelCorrelationActorsQuery, + FunnelCorrelationQuery, + FunnelsActorsQuery, + InsightActorsQuery, + HogQLQueryResponse, + StickinessQuery, + TrendsQuery, +) from posthog.types import InsightActorsQueryNode @@ -37,6 +46,11 @@ def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: funnels_runner = cast(FunnelsQueryRunner, self.source_runner) funnels_runner.context.actorsQuery = cast(FunnelsActorsQuery, self.query) return funnels_runner.to_actors_query() + elif isinstance(self.source_runner, FunnelCorrelationQueryRunner): + funnel_correlation_runner = cast(FunnelCorrelationQueryRunner, self.source_runner) + assert isinstance(self.query, FunnelCorrelationActorsQuery) + funnel_correlation_runner.correlation_actors_query = self.query + return funnel_correlation_runner.to_actors_query() elif isinstance(self.source_runner, RetentionQueryRunner): query = cast(InsightActorsQuery, self.query) retention_runner = cast(RetentionQueryRunner, self.source_runner) @@ -65,6 +79,11 @@ def group_type_index(self) -> int | None: if isinstance(self.source_runner, RetentionQueryRunner): return cast(RetentionQueryRunner, self.source_runner).group_type_index + if isinstance(self.source_runner, FunnelCorrelationQueryRunner): + assert isinstance(self.query, FunnelCorrelationActorsQuery) + assert isinstance(self.query.source, FunnelCorrelationQuery) + return self.query.source.source.source.aggregation_group_type_index + if ( isinstance(self.source_runner, StickinessQueryRunner) and isinstance(self.query.source, StickinessQuery) ) or (isinstance(self.source_runner, TrendsQueryRunner) and isinstance(self.query.source, TrendsQuery)): diff --git a/posthog/hogql_queries/query_runner.py b/posthog/hogql_queries/query_runner.py index 3e51cafeae97b..3cd3d1e68bf9a 100644 --- a/posthog/hogql_queries/query_runner.py +++ b/posthog/hogql_queries/query_runner.py @@ -17,6 +17,7 @@ from posthog.metrics import LABEL_TEAM_ID from posthog.models import Team from posthog.schema import ( + FunnelCorrelationActorsQuery, FunnelCorrelationQuery, FunnelsActorsQuery, TrendsQuery, @@ -95,6 +96,7 @@ class CachedQueryResponse(QueryResponse): InsightActorsQuery, FunnelsActorsQuery, FunnelCorrelationQuery, + FunnelCorrelationActorsQuery, InsightActorsQueryOptions, SessionsTimelineQuery, WebOverviewQuery, @@ -198,11 +200,11 @@ def get_query_runner( limit_context=limit_context, modifiers=modifiers, ) - if kind == "InsightActorsQuery": + if kind == "InsightActorsQuery" or kind == "FunnelsActorsQuery" or kind == "FunnelCorrelationActorsQuery": from .insights.insight_actors_query_runner import InsightActorsQueryRunner return InsightActorsQueryRunner( - query=cast(InsightActorsQuery | FunnelsActorsQuery | Dict[str, Any], query), + query=cast(InsightActorsQuery | Dict[str, Any], query), team=team, timings=timings, limit_context=limit_context, diff --git a/posthog/schema.py b/posthog/schema.py index 7c770790364ce..d82c6c21b48ff 100644 --- a/posthog/schema.py +++ b/posthog/schema.py @@ -538,6 +538,8 @@ class NodeKind(str, Enum): HogQLMetadata = "HogQLMetadata" HogQLAutocomplete = "HogQLAutocomplete" ActorsQuery = "ActorsQuery" + FunnelsActorsQuery = "FunnelsActorsQuery" + FunnelCorrelationActorsQuery = "FunnelCorrelationActorsQuery" SessionsTimelineQuery = "SessionsTimelineQuery" DataTableNode = "DataTableNode" DataVisualizationNode = "DataVisualizationNode" @@ -1182,7 +1184,7 @@ class FunnelCorrelationResponse(BaseModel): offset: Optional[int] = None results: FunnelCorrelationResult timings: Optional[List[QueryTiming]] = None - types: Optional[List[str]] = None + types: Optional[List] = None class FunnelsFilterLegacy(BaseModel): @@ -1513,7 +1515,7 @@ class QueryResponseAlternative17(BaseModel): offset: Optional[int] = None results: FunnelCorrelationResult timings: Optional[List[QueryTiming]] = None - types: Optional[List[str]] = None + types: Optional[List] = None class RetentionFilter(BaseModel): @@ -2738,7 +2740,7 @@ class FunnelsActorsQuery(BaseModel): description="Used together with `funnelTrendsDropOff` for funnels time conversion date for the persons modal.", ) includeRecordings: Optional[bool] = None - kind: Literal["InsightActorsQuery"] = "InsightActorsQuery" + kind: Literal["FunnelsActorsQuery"] = "FunnelsActorsQuery" response: Optional[ActorsQueryResponse] = None source: FunnelsQuery @@ -2802,13 +2804,42 @@ class InsightActorsQuery(BaseModel): status: Optional[str] = None +class FunnelCorrelationActorsQuery(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + funnelCorrelationPersonConverted: Optional[bool] = None + funnelCorrelationPersonEntity: Optional[Union[EventsNode, ActionsNode, DataWarehouseNode]] = None + funnelCorrelationPropertyValues: Optional[ + List[ + Union[ + EventPropertyFilter, + PersonPropertyFilter, + ElementPropertyFilter, + SessionPropertyFilter, + CohortPropertyFilter, + RecordingDurationFilter, + GroupPropertyFilter, + FeaturePropertyFilter, + HogQLPropertyFilter, + EmptyPropertyFilter, + DataWarehousePropertyFilter, + ] + ] + ] = None + includeRecordings: Optional[bool] = None + kind: Literal["FunnelCorrelationActorsQuery"] = "FunnelCorrelationActorsQuery" + response: Optional[ActorsQueryResponse] = None + source: FunnelCorrelationQuery + + class InsightActorsQueryOptions(BaseModel): model_config = ConfigDict( extra="forbid", ) kind: Literal["InsightActorsQueryOptions"] = "InsightActorsQueryOptions" response: Optional[InsightActorsQueryOptionsResponse] = None - source: Union[InsightActorsQuery, FunnelsActorsQuery] + source: Union[InsightActorsQuery, FunnelsActorsQuery, FunnelCorrelationActorsQuery] class ActorsQuery(BaseModel): @@ -2856,7 +2887,7 @@ class ActorsQuery(BaseModel): response: Optional[ActorsQueryResponse] = Field(default=None, description="Cached query response") search: Optional[str] = None select: Optional[List[str]] = None - source: Optional[Union[InsightActorsQuery, FunnelsActorsQuery, HogQLQuery]] = None + source: Optional[Union[InsightActorsQuery, FunnelsActorsQuery, FunnelCorrelationActorsQuery, HogQLQuery]] = None class DataTableNode(BaseModel): diff --git a/posthog/types.py b/posthog/types.py index c3fa98e0f3ea1..3b434f16417ac 100644 --- a/posthog/types.py +++ b/posthog/types.py @@ -14,6 +14,7 @@ EventsNode, DataWarehousePropertyFilter, FeaturePropertyFilter, + FunnelCorrelationActorsQuery, FunnelExclusionActionsNode, FunnelExclusionEventsNode, FunnelsActorsQuery, @@ -43,7 +44,7 @@ LifecycleQuery, ] -InsightActorsQueryNode: TypeAlias = Union[InsightActorsQuery, FunnelsActorsQuery] +InsightActorsQueryNode: TypeAlias = Union[InsightActorsQuery, FunnelsActorsQuery, FunnelCorrelationActorsQuery] AnyPropertyFilter: TypeAlias = Union[ EventPropertyFilter,