diff --git a/posthog/hogql/printer.py b/posthog/hogql/printer.py index 74341e76b6839..93a8ec42bdc4e 100644 --- a/posthog/hogql/printer.py +++ b/posthog/hogql/printer.py @@ -1075,8 +1075,10 @@ def visit_window_expr(self, node: ast.WindowExpr): if len(node.partition_by) == 0: raise HogQLException("PARTITION BY must have at least one argument") strings.append("PARTITION BY") + columns = [] for expr in node.partition_by: - strings.append(self.visit(expr)) + columns.append(self.visit(expr)) + strings.append(", ".join(columns)) if node.order_by is not None: if len(node.order_by) == 0: diff --git a/posthog/hogql_queries/insights/funnels/base.py b/posthog/hogql_queries/insights/funnels/base.py index 3278f0c236f77..f962d03b23ac9 100644 --- a/posthog/hogql_queries/insights/funnels/base.py +++ b/posthog/hogql_queries/insights/funnels/base.py @@ -1,19 +1,35 @@ from abc import ABC +from functools import cached_property from typing import Any, Dict, List, Optional, Tuple, cast import uuid from posthog.clickhouse.materialized_columns.column import ColumnName +from posthog.constants import BREAKDOWN_VALUES_LIMIT from posthog.hogql import ast -from posthog.hogql.parser import parse_expr +from posthog.hogql.parser import parse_expr, parse_select from posthog.hogql.property import action_to_expr, property_to_expr +from posthog.hogql.query import execute_hogql_query from posthog.hogql_queries.insights.funnels.funnel_event_query import FunnelEventQuery from posthog.hogql_queries.insights.funnels.funnel_query_context import FunnelQueryContext -from posthog.hogql_queries.insights.funnels.utils import funnel_window_interval_unit_to_sql +from posthog.hogql_queries.insights.funnels.utils import ( + funnel_window_interval_unit_to_sql, + get_breakdown_expr, +) from posthog.hogql_queries.insights.utils.entities import is_equal, is_superset from posthog.models.action.action import Action +from posthog.models.cohort.cohort import Cohort from posthog.models.property.property import PropertyName from posthog.queries.util import correct_result_for_sampling -from posthog.schema import ActionsNode, EventsNode, FunnelExclusionActionsNode +from posthog.queries.breakdown_props import ALL_USERS_COHORT_ID, get_breakdown_cohort_name +from posthog.schema import ( + ActionsNode, + BreakdownAttributionType, + BreakdownType, + EventsNode, + FunnelExclusionActionsNode, + StepOrderValue, +) from posthog.types import EntityNode, ExclusionEntityNode +from rest_framework.exceptions import ValidationError class FunnelBase(ABC): @@ -45,13 +61,217 @@ def get_step_counts_query(self) -> str: def get_step_counts_without_aggregation_query(self) -> str: raise NotImplementedError() - def _format_results(self, results) -> List[Dict[str, Any]]: - breakdownFilter = self.context.breakdownFilter + @cached_property + def breakdown_cohorts(self) -> List[Cohort]: + team, breakdown = self.context.team, self.context.breakdown + + if isinstance(breakdown, list): + cohorts = Cohort.objects.filter(team_id=team.pk, pk__in=[b for b in breakdown if b != "all"]) + else: + cohorts = Cohort.objects.filter(team_id=team.pk, pk=breakdown) + + return list(cohorts) + + @cached_property + def breakdown_cohorts_ids(self) -> List[int]: + breakdown = self.context.breakdown + + ids = [int(cohort.pk) for cohort in self.breakdown_cohorts] + + if isinstance(breakdown, list) and "all" in breakdown: + ids.append(ALL_USERS_COHORT_ID) + + return ids + + @cached_property + def breakdown_values(self) -> List[int] | List[str] | List[List[str]]: + # """ + # Returns the top N breakdown prop values for event/person breakdown + + # e.g. for Browser with limit 3 might return ['Chrome', 'Safari', 'Firefox', 'Other'] + # """ + team, query, funnelsFilter, breakdownType, breakdownFilter, breakdownAttributionType = ( + self.context.team, + self.context.query, + self.context.funnelsFilter, + self.context.breakdownType, + self.context.breakdownFilter, + self.context.breakdownAttributionType, + ) + + use_all_funnel_entities = ( + breakdownAttributionType + in [ + BreakdownAttributionType.first_touch, + BreakdownAttributionType.last_touch, + ] + or funnelsFilter.funnelOrderType == StepOrderValue.unordered + ) + first_entity = query.series[0] + target_entity = first_entity + if breakdownAttributionType == BreakdownAttributionType.step: + assert isinstance(funnelsFilter.breakdownAttributionValue, int) + target_entity = query.series[funnelsFilter.breakdownAttributionValue] + + if breakdownType == "cohort": + return self.breakdown_cohorts_ids + else: + # get query params + breakdown_expr = self._get_breakdown_expr() + breakdown_limit_or_default = breakdownFilter.breakdown_limit or BREAKDOWN_VALUES_LIMIT + offset = 0 + + funnel_event_query = FunnelEventQuery(context=self.context) + + if use_all_funnel_entities: + entity_expr = funnel_event_query._entity_expr(skip_entity_filter=False) + prop_exprs = funnel_event_query._properties_expr() + else: + entity_expr = None + # TODO implement for strict and ordered funnels + # entity_params, entity_format_params = get_entity_filtering_params( + # allowed_entities=[target_entity], + # team_id=team.pk, + # table_name="e", + # person_id_joined_alias=person_id_joined_alias, + # person_properties_mode=person_properties_mode, + # hogql_context=filter.hogql_context, + # ) + + if target_entity.properties: + prop_exprs = [property_to_expr(target_entity.properties, team)] + else: + prop_exprs = [] + + where_exprs: List[ast.Expr | None] = [ + # entity filter + entity_expr, + # prop filter + *prop_exprs, + # date range filter + funnel_event_query._date_range_expr(), + # null persons filter + parse_expr("notEmpty(e.person_id)"), + ] + + # build query + values_query = ast.SelectQuery( + select=[ast.Alias(alias="value", expr=breakdown_expr), parse_expr("count(*) as count")], + select_from=ast.JoinExpr( + table=ast.Field(chain=["events"]), + alias="e", + ), + where=ast.And(exprs=[expr for expr in where_exprs if expr is not None]), + group_by=[ast.Field(chain=["value"])], + order_by=[ + ast.OrderExpr(expr=ast.Field(chain=["count"]), order="DESC"), + ast.OrderExpr(expr=ast.Field(chain=["value"]), order="DESC"), + ], + limit=ast.Constant(value=breakdown_limit_or_default + 1), + offset=ast.Constant(value=offset), + ) + + if query.samplingFactor is not None: + assert isinstance(values_query.select_from, ast.JoinExpr) + values_query.select_from.sample = ast.SampleExpr( + sample_value=ast.RatioExpr(left=ast.Constant(value=query.samplingFactor)) + ) + + # execute query + results = execute_hogql_query(values_query, self.context.team).results + if results is None: + raise ValidationError("Apologies, there has been an error computing breakdown values.") + return [row[0] for row in results[0:breakdown_limit_or_default]] + + def _get_breakdown_select_prop(self) -> List[ast.Expr]: + breakdown, breakdownAttributionType, funnelsFilter = ( + self.context.breakdown, + self.context.breakdownAttributionType, + self.context.funnelsFilter, + ) + + if not breakdown: + return [] + + # breakdown prop + prop_basic = ast.Alias(alias="prop_basic", expr=self._get_breakdown_expr()) + + # breakdown attribution + if breakdownAttributionType == BreakdownAttributionType.step: + select_columns = [] + default_breakdown_selector = "[]" if self._query_has_array_breakdown() else "NULL" + # get prop value from each step + for index, _ in enumerate(self.context.query.series): + select_columns.append( + parse_expr(f"if(step_{index} = 1, prop_basic, {default_breakdown_selector}) as prop_{index}") + ) + + final_select = parse_expr(f"prop_{funnelsFilter.breakdownAttributionValue} as prop") + prop_window = parse_expr("groupUniqArray(prop) over (PARTITION by aggregation_target) as prop_vals") + + return [prop_basic, *select_columns, final_select, prop_window] + elif breakdownAttributionType in [ + BreakdownAttributionType.first_touch, + BreakdownAttributionType.last_touch, + ]: + prop_conditional = ( + "notEmpty(arrayFilter(x -> notEmpty(x), prop))" + if self._query_has_array_breakdown() + else "isNotNull(prop)" + ) + + aggregate_operation = ( + "argMinIf" if breakdownAttributionType == BreakdownAttributionType.first_touch else "argMaxIf" + ) + + breakdown_window_selector = f"{aggregate_operation}(prop, timestamp, {prop_conditional})" + prop_window = parse_expr(f"{breakdown_window_selector} over (PARTITION by aggregation_target) as prop_vals") + return [ + prop_basic, + ast.Alias(alias="prop", expr=ast.Field(chain=["prop_basic"])), + prop_window, + ] + else: + # all_events + return [ + prop_basic, + ast.Alias(alias="prop", expr=ast.Field(chain=["prop_basic"])), + ] + + def _get_breakdown_expr(self) -> ast.Expr: + breakdown, breakdownType, breakdownFilter = ( + self.context.breakdown, + self.context.breakdownType, + self.context.breakdownFilter, + ) + + if breakdownType == "person": + properties_column = "person.properties" + return get_breakdown_expr(breakdown, properties_column) + elif breakdownType == "event": + properties_column = "properties" + normalize_url = breakdownFilter.breakdown_normalize_url + return get_breakdown_expr(breakdown, properties_column, normalize_url=normalize_url) + elif breakdownType == "cohort": + return ast.Field(chain=["value"]) + elif breakdownType == "group": + properties_column = f"group_{breakdownFilter.breakdown_group_type_index}.properties" + return get_breakdown_expr(breakdown, properties_column) + elif breakdownType == "hogql": + return ast.Alias( + alias="value", + expr=parse_expr(str(breakdown)), + ) + else: + raise ValidationError(detail=f"Unsupported breakdown type: {breakdownType}") + + def _format_results(self, results) -> List[Dict[str, Any]] | List[List[Dict[str, Any]]]: + breakdown = self.context.breakdown if not results or len(results) == 0: return [] - if breakdownFilter.breakdown: + if breakdown: return [self._format_single_funnel(res, with_breakdown=True) for res in results] else: return self._format_single_funnel(results[0]) @@ -63,7 +283,7 @@ def _format_single_funnel(self, results, with_breakdown=False): steps = [] total_people = 0 - # breakdown_value = results[-1] + breakdown_value = results[-1] # cache_invalidation_key = generate_short_id() for index, step in enumerate(reversed(self.context.query.series)): @@ -91,25 +311,27 @@ def _format_single_funnel(self, results, with_breakdown=False): # converted_people_filter = self._filter.shallow_clone({"funnel_step": funnel_step}) # dropped_people_filter = self._filter.shallow_clone({"funnel_step": -funnel_step}) - # if with_breakdown: - # # breakdown will return a display ready value - # # breakdown_value will return the underlying id if different from display ready value (ex: cohort id) - # serialized_result.update( - # { - # "breakdown": get_breakdown_cohort_name(breakdown_value) - # if self._filter.breakdown_type == "cohort" - # else breakdown_value, - # "breakdown_value": breakdown_value, - # } - # ) - # # important to not try and modify this value any how - as these - # # are keys for fetching persons - - # # Add in the breakdown to people urls as well - # converted_people_filter = converted_people_filter.shallow_clone( - # {"funnel_step_breakdown": breakdown_value} - # ) - # dropped_people_filter = dropped_people_filter.shallow_clone({"funnel_step_breakdown": breakdown_value}) + if with_breakdown: + # breakdown will return a display ready value + # breakdown_value will return the underlying id if different from display ready value (ex: cohort id) + serialized_result.update( + { + "breakdown": ( + get_breakdown_cohort_name(breakdown_value) + if self.context.breakdownFilter.breakdown_type == "cohort" + else breakdown_value + ), + "breakdown_value": breakdown_value, + } + ) + # important to not try and modify this value any how - as these + # are keys for fetching persons + + # # Add in the breakdown to people urls as well + # converted_people_filter = converted_people_filter.shallow_clone( + # {"funnel_step_breakdown": breakdown_value} + # ) + # dropped_people_filter = dropped_people_filter.shallow_clone({"funnel_step_breakdown": breakdown_value}) # serialized_result.update( # { @@ -144,7 +366,7 @@ def _serialize_step( name = action.name return { - "action_id": step.event if isinstance(step, EventsNode) else str(step.id), + "action_id": step.event if isinstance(step, EventsNode) else step.id, "name": name, "custom_name": step.custom_name, "order": index, @@ -164,7 +386,13 @@ def _get_inner_event_query( skip_entity_filter=False, skip_step_filter=False, ) -> ast.SelectQuery: - query, funnelsFilter = self.context.query, self.context.funnelsFilter + query, funnelsFilter, breakdown, breakdownType, breakdownAttributionType = ( + self.context.query, + self.context.funnelsFilter, + self.context.breakdown, + self.context.breakdownType, + self.context.breakdownAttributionType, + ) entities_to_use = entities or query.series # extra_fields = [] @@ -178,11 +406,6 @@ def _get_inner_event_query( # extra_event_properties=self._extra_event_properties, # ).get_query(entities_to_use, entity_name, skip_entity_filter=skip_entity_filter) - # if skip_step_filter: - # steps_conditions = "1=1" - # else: - # steps_conditions = self._get_steps_conditions(length=len(entities_to_use)) - all_step_cols: List[ast.Expr] = [] for index, entity in enumerate(entities_to_use): step_cols = self._get_step_col(entity, index, entity_name) @@ -194,43 +417,115 @@ def _get_inner_event_query( # where i is the starting step for exclusion on that entity all_step_cols.extend(step_cols) - # breakdown_select_prop, breakdown_select_prop_params = self._get_breakdown_select_prop() - - # if breakdown_select_prop: - # all_step_cols.append(breakdown_select_prop) + breakdown_select_prop = self._get_breakdown_select_prop() - # extra_join = "" - - # if self._filter.breakdown: - # if self._filter.breakdown_type == "cohort": - # extra_join = self._get_cohort_breakdown_join() - # else: - # values = self._get_breakdown_conditions() - # self.params.update({"breakdown_values": values}) + if breakdown_select_prop: + all_step_cols.extend(breakdown_select_prop) funnel_events_query.select = [*funnel_events_query.select, *all_step_cols] - # funnel_events_query = funnel_events_query.format( - # # extra_join=extra_join, - # # step_filter="AND ({})".format(steps_conditions), - # ) + if breakdown and breakdownType == BreakdownType.cohort: + if funnel_events_query.select_from is None: + raise ValidationError("Apologies, there was an error adding cohort breakdowns to the query.") + funnel_events_query.select_from.next_join = self._get_cohort_breakdown_join() + + if not skip_step_filter: + assert isinstance(funnel_events_query.where, ast.Expr) + steps_conditions = self._get_steps_conditions(length=len(entities_to_use)) + funnel_events_query.where = ast.And(exprs=[funnel_events_query.where, steps_conditions]) - # if self._filter.breakdown and self._filter.breakdown_attribution_type != BreakdownAttributionType.ALL_EVENTS: - # # ALL_EVENTS attribution is the old default, which doesn't need the subquery - # return self._add_breakdown_attribution_subquery(funnel_events_query) + if breakdown and breakdownAttributionType != BreakdownAttributionType.all_events: + # ALL_EVENTS attribution is the old default, which doesn't need the subquery + return self._add_breakdown_attribution_subquery(funnel_events_query) return funnel_events_query - # def _get_steps_conditions(self, length: int) -> str: - # step_conditions: List[str] = [] + def _get_cohort_breakdown_join(self) -> ast.JoinExpr: + breakdown = self.context.breakdown - # for index in range(length): - # step_conditions.append(f"step_{index} = 1") + cohort_queries: List[ast.SelectQuery] = [] - # for exclusion_id, entity in enumerate(self._filter.exclusions): - # step_conditions.append(f"exclusion_{exclusion_id}_step_{entity.funnel_from_step} = 1") + for cohort in self.breakdown_cohorts: + query = parse_select( + f"select id as cohort_person_id, {cohort.pk} as value from persons where id in cohort {cohort.pk}" + ) + assert isinstance(query, ast.SelectQuery) + cohort_queries.append(query) + + if isinstance(breakdown, list) and "all" in breakdown: + all_query = FunnelEventQuery(context=self.context).to_query() + all_query.select = [ + ast.Alias(alias="cohort_person_id", expr=ast.Field(chain=["person_id"])), + ast.Alias(alias="value", expr=ast.Constant(value=ALL_USERS_COHORT_ID)), + ] + cohort_queries.append(all_query) + + return ast.JoinExpr( + join_type="INNER JOIN", + table=ast.SelectUnionQuery(select_queries=cohort_queries), + alias="cohort_join", + constraint=ast.JoinConstraint( + expr=ast.CompareOperation( + left=ast.Field(chain=[FunnelEventQuery.EVENT_TABLE_ALIAS, "person_id"]), + right=ast.Field(chain=["cohort_join", "cohort_person_id"]), + op=ast.CompareOperationOp.Eq, + ) + ), + ) - # return " OR ".join(step_conditions) + def _add_breakdown_attribution_subquery(self, inner_query: ast.SelectQuery) -> ast.SelectQuery: + breakdown, breakdownAttributionType = ( + self.context.breakdown, + self.context.breakdownAttributionType, + ) + + if breakdownAttributionType in [ + BreakdownAttributionType.first_touch, + BreakdownAttributionType.last_touch, + ]: + # When breaking down by first/last touch, each person can only have one prop value + # so just select that. Except for the empty case, where we select the default. + + if self._query_has_array_breakdown(): + default_breakdown_value = f"""[{','.join(["''" for _ in range(len(breakdown or []))])}]""" + # default is [''] when dealing with a single breakdown array, otherwise ['', '', ...., ''] + breakdown_selector = parse_expr( + f"if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, {default_breakdown_value})" + ) + else: + breakdown_selector = ast.Field(chain=["prop_vals"]) + + return ast.SelectQuery( + select=[ast.Field(chain=["*"]), ast.Alias(alias="prop", expr=breakdown_selector)], + select_from=ast.JoinExpr(table=inner_query), + ) + + # When breaking down by specific step, each person can have multiple prop values + # so array join those to each event + query = ast.SelectQuery( + select=[ast.Field(chain=["*"]), ast.Field(chain=["prop"])], + select_from=ast.JoinExpr(table=inner_query), + array_join_op="ARRAY JOIN", + array_join_list=[ast.Alias(alias="prop", expr=ast.Field(chain=["prop_vals"]))], + ) + + if self._query_has_array_breakdown(): + query.where = ast.CompareOperation( + left=ast.Field(chain=["prop"]), right=ast.Array(exprs=[]), op=ast.CompareOperationOp.NotEq + ) + + return query + + def _get_steps_conditions(self, length: int) -> ast.Expr: + step_conditions: List[ast.Expr] = [] + + for index in range(length): + step_conditions.append(parse_expr(f"step_{index} = 1")) + + for exclusion_id, entity in enumerate(self.context.funnelsFilter.exclusions or []): + step_conditions.append(parse_expr(f"exclusion_{exclusion_id}_step_{entity.funnelFromStep} = 1")) + + return ast.Or(exprs=step_conditions) def _get_step_col( self, @@ -458,43 +753,63 @@ def _get_partition_cols(self, level_index: int, max_steps: int) -> List[ast.Expr return exprs - def _get_breakdown_expr(self, group_remaining=False) -> List[ast.Expr]: - # SEE BELOW - # if self._filter.breakdown: - # other_aggregation = "['Other']" if self._query_has_array_breakdown() else "'Other'" - # if group_remaining and self._filter.breakdown_type in [ - # "person", - # "event", - # "group", - # ]: - # return f", if(has(%(breakdown_values)s, prop), prop, {other_aggregation}) as prop" - # else: - # # Cohorts don't have "Other" aggregation - # return ", prop" - # else: - # return "" - return [] + def _get_breakdown_prop_expr(self, group_remaining=False) -> List[ast.Expr]: + # SEE BELOW for a string implementation of the following + breakdown, breakdownType = self.context.breakdown, self.context.breakdownType + + if breakdown: + other_aggregation = "['Other']" if self._query_has_array_breakdown() else "'Other'" + if group_remaining and breakdownType in [ + BreakdownType.person, + BreakdownType.event, + BreakdownType.group, + ]: + breakdown_values = self._get_breakdown_conditions() + return [parse_expr(f"if(has({breakdown_values}, prop), prop, {other_aggregation}) as prop")] + else: + # Cohorts don't have "Other" aggregation + return [ast.Field(chain=["prop"])] + else: + return [] def _get_breakdown_prop(self, group_remaining=False) -> str: - # SEE ABOVE - # if self._filter.breakdown: - # other_aggregation = "['Other']" if self._query_has_array_breakdown() else "'Other'" - # if group_remaining and self._filter.breakdown_type in [ - # "person", - # "event", - # "group", - # ]: - # return f", if(has(%(breakdown_values)s, prop), prop, {other_aggregation}) as prop" - # else: - # # Cohorts don't have "Other" aggregation - # return ", prop" - # else: - # return "" - return "" + # SEE ABOVE for an ast implementation of the following + breakdown = self.context.breakdown + + if breakdown: + # TODO: implement the below if group_remaining can ever be true + # breakdown_values = self._get_breakdown_conditions() + # other_aggregation = "['Other']" if self._query_has_array_breakdown() else "'Other'" + # if group_remaining and breakdownFilter.breakdown_type in [ + # BreakdownType.person, + # BreakdownType.event, + # BreakdownType.group, + # ]: + # return f", if(has({breakdown_values}, prop), prop, {other_aggregation}) as prop" + # else: + # # Cohorts don't have "Other" aggregation + return ", prop" + else: + return "" + + def _get_breakdown_conditions(self) -> Optional[List[int] | List[str] | List[List[str]]]: + """ + For people, pagination sets the offset param, which is common across filters + and gives us the wrong breakdown values here, so we override it. + For events, depending on the attribution type, we either look at only one entity, + or all of them in the funnel. + if this is a multi property breakdown then the breakdown values are misleading + e.g. [Chrome, Safari], [95, 15] doesn't make clear that Chrome 15 isn't valid but Safari 15 is + so the generated list here must be [[Chrome, 95], [Safari, 15]] + """ + if self.context.breakdown: + return self.breakdown_values + + return None def _query_has_array_breakdown(self) -> bool: - breakdown, breakdown_type = self.context.breakdownFilter.breakdown, self.context.breakdownFilter.breakdown_type - return not isinstance(breakdown, str) and breakdown_type != "cohort" + breakdown, breakdownType = self.context.breakdown, self.context.breakdownType + return not isinstance(breakdown, str) and breakdownType != "cohort" def _get_exclusion_condition(self) -> List[ast.Expr]: funnelsFilter = self.context.funnelsFilter diff --git a/posthog/hogql_queries/insights/funnels/funnel.py b/posthog/hogql_queries/insights/funnels/funnel.py index a5d324a640830..adbfaaaccc991 100644 --- a/posthog/hogql_queries/insights/funnels/funnel.py +++ b/posthog/hogql_queries/insights/funnels/funnel.py @@ -33,7 +33,7 @@ class Funnel(FunnelBase): def get_query(self): max_steps = self.context.max_steps - breakdown_exprs = self._get_breakdown_expr() + breakdown_exprs = self._get_breakdown_prop_expr() select: List[ast.Expr] = [ *self._get_count_columns(max_steps), @@ -50,7 +50,7 @@ def get_query(self): def get_step_counts_query(self): max_steps = self.context.max_steps - breakdown_exprs = self._get_breakdown_expr() + breakdown_exprs = self._get_breakdown_prop_expr() inner_timestamps, outer_timestamps = self._get_timestamp_selects() person_and_group_properties = self._get_person_and_group_properties() @@ -104,7 +104,7 @@ def get_step_counts_without_aggregation_query(self): raise ValidationError("Funnels require at least two steps before calculating.") formatted_query = self._build_step_subquery(2, max_steps) - breakdown_exprs = self._get_breakdown_expr() + breakdown_exprs = self._get_breakdown_prop_expr() select: List[ast.Expr] = [ ast.Field(chain=["*"]), @@ -120,11 +120,13 @@ def get_step_counts_without_aggregation_query(self): ast.CompareOperation( left=ast.Field(chain=["step_0"]), right=ast.Constant(value=1), op=ast.CompareOperationOp.Eq ), - ast.CompareOperation( - left=ast.Field(chain=["exclusion"]), right=ast.Constant(value=0), op=ast.CompareOperationOp.Eq - ) - if self._get_exclusion_condition() != [] - else None, + ( + ast.CompareOperation( + left=ast.Field(chain=["exclusion"]), right=ast.Constant(value=0), op=ast.CompareOperationOp.Eq + ) + if self._get_exclusion_condition() != [] + else None + ), ] where = ast.And(exprs=[expr for expr in where_exprs if expr is not None]) @@ -142,7 +144,7 @@ def _build_step_subquery( select = [ *select, *self._get_partition_cols(1, max_steps), - *self._get_breakdown_expr(group_remaining=True), + *self._get_breakdown_prop_expr(group_remaining=True), *self._get_person_and_group_properties(), ] @@ -153,13 +155,13 @@ def _build_step_subquery( outer_select = [ *select, *self._get_partition_cols(level_index, max_steps), - *self._get_breakdown_expr(), + *self._get_breakdown_prop_expr(), *self._get_person_and_group_properties(), ] inner_select = [ *select, *self._get_comparison_cols(level_index, max_steps), - *self._get_breakdown_expr(), + *self._get_breakdown_prop_expr(), *self._get_person_and_group_properties(), ] diff --git a/posthog/hogql_queries/insights/funnels/funnel_query_context.py b/posthog/hogql_queries/insights/funnels/funnel_query_context.py index f7a12f91c3a45..db5fc7e7a17de 100644 --- a/posthog/hogql_queries/insights/funnels/funnel_query_context.py +++ b/posthog/hogql_queries/insights/funnels/funnel_query_context.py @@ -1,11 +1,14 @@ -from typing import Optional +from typing import List, Optional, Union from posthog.hogql.constants import LimitContext from posthog.hogql.timings import HogQLTimings from posthog.hogql_queries.insights.query_context import QueryContext from posthog.models.filters.mixins.utils import cached_property +from posthog.models.property.util import box_value from posthog.models.team.team import Team from posthog.schema import ( + BreakdownAttributionType, BreakdownFilter, + BreakdownType, FunnelConversionWindowTimeUnit, FunnelsFilter, FunnelsQuery, @@ -18,6 +21,10 @@ class FunnelQueryContext(QueryContext): funnelsFilter: FunnelsFilter breakdownFilter: BreakdownFilter + breakdown: List[Union[str, int]] | None + breakdownType: BreakdownType + breakdownAttributionType: BreakdownAttributionType + funnelWindowInterval: int funnelWindowIntervalUnit: FunnelConversionWindowTimeUnit @@ -34,11 +41,46 @@ def __init__( self.funnelsFilter = self.query.funnelsFilter or FunnelsFilter() self.breakdownFilter = self.query.breakdownFilter or BreakdownFilter() + # defaults + self.breakdownType = self.breakdownFilter.breakdown_type or BreakdownType.event + self.breakdownAttributionType = ( + self.funnelsFilter.breakdownAttributionType or BreakdownAttributionType.first_touch + ) self.funnelWindowInterval = self.funnelsFilter.funnelWindowInterval or 14 self.funnelWindowIntervalUnit = ( self.funnelsFilter.funnelWindowIntervalUnit or FunnelConversionWindowTimeUnit.day ) + # the API accepts either: + # a string (single breakdown) in parameter "breakdown" + # a list of numbers (one or more cohorts) in parameter "breakdown" + # a list of strings (multiple breakdown) in parameter "breakdowns" + # if the breakdown is a string, box it as a list to reduce paths through the code + # + # The code below ensures that breakdown is always an array + # without it affecting the multiple areas of the code outside of funnels that use breakdown + # + # Once multi property breakdown is implemented in Trends this becomes unnecessary + + # if isinstance(self._filter.breakdowns, List) and self._filter.breakdown_type in [ + # "person", + # "event", + # "hogql", + # None, + # ]: + # data.update({"breakdown": [b.get("property") for b in self._filter.breakdowns]}) + + if isinstance(self.breakdownFilter.breakdown, str) and self.breakdownType in [ + "person", + "event", + "hogql", + None, + ]: + boxed_breakdown: List[Union[str, int]] = box_value(self.breakdownFilter.breakdown) + self.breakdown = boxed_breakdown + else: + self.breakdown = self.breakdownFilter.breakdown # type: ignore + @cached_property def max_steps(self) -> int: return len(self.query.series) diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr index 18a9766da8327..77c4f901645c2 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr @@ -80,7 +80,7 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-05-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-05-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('step one', 'step three', 'step two'))))))) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-05-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-05-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('step one', 'step three', 'step two'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, steps @@ -280,7 +280,7 @@ FROM person_overrides WHERE equals(person_overrides.team_id, 2) GROUP BY person_overrides.old_person_id) AS e__override ON equals(e.person_id, e__override.old_person_id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2011-12-25 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2012-01-01 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('$autocapture', 'user signed up'))))))) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2011-12-25 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2012-01-01 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('$autocapture', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, steps @@ -364,7 +364,7 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))))) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, steps @@ -467,7 +467,7 @@ WHERE equals(person.team_id, 2) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-07-01 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('$pageview', 'user signed up')), or(and(ifNull(ilike(e__pdi__person.properties___email, '%.com%'), 0), ifNull(equals(e__pdi__person.properties___age, '20'), 0)), or(ifNull(ilike(e__pdi__person.properties___email, '%.org%'), 0), ifNull(equals(e__pdi__person.properties___age, '28'), 0)))))))) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-07-01 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('$pageview', 'user signed up')), or(and(ifNull(ilike(e__pdi__person.properties___email, '%.com%'), 0), ifNull(equals(e__pdi__person.properties___age, '20'), 0)), or(ifNull(ilike(e__pdi__person.properties___email, '%.org%'), 0), ifNull(equals(e__pdi__person.properties___age, '28'), 0)))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, steps @@ -883,7 +883,7 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))))) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, steps @@ -942,7 +942,7 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'US/Pacific'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'US/Pacific')), lessOrEquals(toTimeZone(e.timestamp, 'US/Pacific'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'US/Pacific'))), in(e.event, tuple('paid', 'user signed up'))))) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'US/Pacific'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'US/Pacific')), lessOrEquals(toTimeZone(e.timestamp, 'US/Pacific'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'US/Pacific'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, steps @@ -953,3 +953,1361 @@ allow_experimental_object_type=1 ''' # --- +# name: TestFunnelBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events + ''' + SELECT ifNull(e__group_0.properties___industry, '') AS value, + count(*) AS count + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), in(e.event, tuple('buy', 'play movie', 'sign up')), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(e.person_id)) + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events.1 + ''' + SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, + countIf(ifNull(equals(steps, 2), 0)) AS step_2, + countIf(ifNull(equals(steps, 3), 0)) AS step_3, + avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, + avg(step_2_average_conversion_time_inner) AS step_2_average_conversion_time, + median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, + median(step_2_median_conversion_time_inner) AS step_2_median_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + avg(step_2_conversion_time) AS step_2_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + median(step_2_conversion_time) AS step_2_median_conversion_time_inner, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + max(steps) OVER (PARTITION BY aggregation_target, + prop) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + step_2_conversion_time AS step_2_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + latest_2 AS latest_2, + prop AS prop, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(latest_0, toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1)) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(latest_1, toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, + if(has(['technology', 'finance'], prop), prop, 'Other') AS prop + FROM + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + latest_2 AS latest_2, + prop_basic AS prop_basic, + prop, + prop_vals AS prop_vals, + prop_vals AS prop + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'play movie'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(equals(e.event, 'buy'), 1, 0) AS step_2, + if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, + ifNull(e__group_0.properties___industry, '') AS prop_basic, + prop_basic AS prop, + argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'play movie', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0)))))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps, + prop + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + GROUP BY prop + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events_poe_v2 + ''' + SELECT ifNull(e__group_0.properties___industry, '') AS value, + count(*) AS count + FROM events AS e + LEFT OUTER JOIN + (SELECT argMax(person_overrides.override_person_id, person_overrides.version) AS override_person_id, + person_overrides.old_person_id AS old_person_id + FROM person_overrides + WHERE equals(person_overrides.team_id, 2) + GROUP BY person_overrides.old_person_id) AS e__override ON equals(e.person_id, e__override.old_person_id) + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), in(e.event, tuple('buy', 'play movie', 'sign up')), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id))) + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events_poe_v2.1 + ''' + SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, + countIf(ifNull(equals(steps, 2), 0)) AS step_2, + countIf(ifNull(equals(steps, 3), 0)) AS step_3, + avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, + avg(step_2_average_conversion_time_inner) AS step_2_average_conversion_time, + median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, + median(step_2_median_conversion_time_inner) AS step_2_median_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + avg(step_2_conversion_time) AS step_2_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + median(step_2_conversion_time) AS step_2_median_conversion_time_inner, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + max(steps) OVER (PARTITION BY aggregation_target, + prop) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + step_2_conversion_time AS step_2_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + latest_2 AS latest_2, + prop AS prop, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(latest_0, toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1)) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(latest_1, toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, + if(has(['technology', 'finance'], prop), prop, 'Other') AS prop + FROM + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + latest_2 AS latest_2, + prop_basic AS prop_basic, + prop, + prop_vals AS prop_vals, + prop_vals AS prop + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'play movie'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(equals(e.event, 'buy'), 1, 0) AS step_2, + if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, + ifNull(e__group_0.properties___industry, '') AS prop_basic, + prop_basic AS prop, + argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'play movie', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0)))))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps, + prop + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + GROUP BY prop + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen + ''' + SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS value, + count(*) AS count + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), in(e.event, tuple('buy', 'sign up')), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(e__pdi.person_id)) + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen.1 + ''' + SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, + countIf(ifNull(equals(steps, 2), 0)) AS step_2, + avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, + median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + max(steps) OVER (PARTITION BY aggregation_target, + prop) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + prop AS prop, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + if(has([[''], ['Mac'], ['Chrome'], ['Safari']], prop), prop, ['Other']) AS prop + FROM + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + prop_basic AS prop_basic, + prop, + prop_vals AS prop_vals, + if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['']) AS prop + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS aggregation_target, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(and(equals(e.event, 'buy'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), 'xyz'), 0)), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS prop_basic, + prop_basic AS prop, + argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps, + prop + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + GROUP BY prop + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step + ''' + SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS value, + count(*) AS count + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), 'xyz'), 0), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(e__pdi.person_id)) + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step.1 + ''' + SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, + countIf(ifNull(equals(steps, 2), 0)) AS step_2, + avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, + median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + max(steps) OVER (PARTITION BY aggregation_target, + prop) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + prop AS prop, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + if(has([['Safari'], ['Mac'], ['Chrome']], prop), prop, ['Other']) AS prop + FROM + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + prop_basic AS prop_basic, + prop_0 AS prop_0, + prop_1 AS prop_1, + prop, + prop_vals AS prop_vals, + prop + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS aggregation_target, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(and(equals(e.event, 'buy'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), 'xyz'), 0)), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS prop_basic, + if(ifNull(equals(step_0, 1), 0), prop_basic, []) AS prop_0, + if(ifNull(equals(step_1, 1), 0), prop_basic, []) AS prop_1, + prop_1 AS prop, + groupUniqArray(prop) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))) ARRAY + JOIN prop_vals AS prop + WHERE ifNull(notEquals(prop, []), isNotNull(prop) + or isNotNull([])))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps, + prop + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + GROUP BY prop + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_breakdown_group + ''' + SELECT ifNull(e__group_0.properties___industry, '') AS value, + count(*) AS count + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), in(e.event, tuple('buy', 'play movie', 'sign up')), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(e__pdi.person_id)) + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_breakdown_group.1 + ''' + SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, + countIf(ifNull(equals(steps, 2), 0)) AS step_2, + countIf(ifNull(equals(steps, 3), 0)) AS step_3, + avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, + avg(step_2_average_conversion_time_inner) AS step_2_average_conversion_time, + median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, + median(step_2_median_conversion_time_inner) AS step_2_median_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + avg(step_2_conversion_time) AS step_2_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + median(step_2_conversion_time) AS step_2_median_conversion_time_inner, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + max(steps) OVER (PARTITION BY aggregation_target, + prop) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + step_2_conversion_time AS step_2_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + latest_2 AS latest_2, + prop AS prop, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(latest_0, toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1)) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(latest_1, toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, + if(has(['technology', 'finance'], prop), prop, 'Other') AS prop + FROM + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + latest_2 AS latest_2, + prop_basic AS prop_basic, + prop, + prop_vals AS prop_vals, + prop_vals AS prop + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS aggregation_target, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'play movie'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(equals(e.event, 'buy'), 1, 0) AS step_2, + if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, + ifNull(e__group_0.properties___industry, '') AS prop_basic, + prop_basic AS prop, + argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'play movie', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0)))))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps, + prop + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + GROUP BY prop + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_breakdown_group.2 + ''' + + SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, + count(*) as count + FROM events e + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_breakdown_group.3 + ''' + + SELECT aggregation_target AS actor_id + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + avg(step_2_conversion_time) step_2_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner, + median(step_2_conversion_time) step_2_median_conversion_time_inner , + prop + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target, + prop) as max_steps, + step_1_conversion_time, + step_2_conversion_time , + prop + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY + AND latest_1 <= latest_2 + AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + if(isNotNull(latest_2) + AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time, + prop + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , + prop + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + if(latest_2 < latest_1, NULL, latest_2) as latest_2 , + prop + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , + if(has(['technology', 'finance'], prop), prop, 'Other') as prop + FROM + (SELECT *, + prop_vals as prop + FROM + (SELECT e.timestamp as timestamp, + pdi.person_id as aggregation_target, + pdi.person_id as person_id, + if(event = 'sign up', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'play movie', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'buy', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2, + replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, + prop_basic as prop, + argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals + FROM events e + INNER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 2 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) ))))) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps, + prop + HAVING steps = max_steps) + WHERE steps IN [1, 2, 3] + AND arrayFlatten(array(prop)) = arrayFlatten(array('finance')) + ORDER BY aggregation_target + LIMIT 100 + OFFSET 0 SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000 + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_breakdown_group.4 + ''' + + SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, + count(*) as count + FROM events e + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_breakdown_group.5 + ''' + + SELECT aggregation_target AS actor_id + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + avg(step_2_conversion_time) step_2_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner, + median(step_2_conversion_time) step_2_median_conversion_time_inner , + prop + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target, + prop) as max_steps, + step_1_conversion_time, + step_2_conversion_time , + prop + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY + AND latest_1 <= latest_2 + AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + if(isNotNull(latest_2) + AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time, + prop + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , + prop + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + if(latest_2 < latest_1, NULL, latest_2) as latest_2 , + prop + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , + if(has(['technology', 'finance'], prop), prop, 'Other') as prop + FROM + (SELECT *, + prop_vals as prop + FROM + (SELECT e.timestamp as timestamp, + pdi.person_id as aggregation_target, + pdi.person_id as person_id, + if(event = 'sign up', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'play movie', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'buy', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2, + replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, + prop_basic as prop, + argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals + FROM events e + INNER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 2 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) ))))) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps, + prop + HAVING steps = max_steps) + WHERE steps IN [2, 3] + AND arrayFlatten(array(prop)) = arrayFlatten(array('finance')) + ORDER BY aggregation_target + LIMIT 100 + OFFSET 0 SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000 + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_breakdown_group.6 + ''' + + SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, + count(*) as count + FROM events e + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_breakdown_group.7 + ''' + + SELECT aggregation_target AS actor_id + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + avg(step_2_conversion_time) step_2_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner, + median(step_2_conversion_time) step_2_median_conversion_time_inner , + prop + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target, + prop) as max_steps, + step_1_conversion_time, + step_2_conversion_time , + prop + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY + AND latest_1 <= latest_2 + AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + if(isNotNull(latest_2) + AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time, + prop + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , + prop + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + if(latest_2 < latest_1, NULL, latest_2) as latest_2 , + prop + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , + if(has(['technology', 'finance'], prop), prop, 'Other') as prop + FROM + (SELECT *, + prop_vals as prop + FROM + (SELECT e.timestamp as timestamp, + pdi.person_id as aggregation_target, + pdi.person_id as person_id, + if(event = 'sign up', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'play movie', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'buy', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2, + replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, + prop_basic as prop, + argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals + FROM events e + INNER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 2 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) ))))) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps, + prop + HAVING steps = max_steps) + WHERE steps IN [1, 2, 3] + AND arrayFlatten(array(prop)) = arrayFlatten(array('technology')) + ORDER BY aggregation_target + LIMIT 100 + OFFSET 0 SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000 + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_breakdown_group.8 + ''' + + SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, + count(*) as count + FROM events e + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_breakdown_group.9 + ''' + + SELECT aggregation_target AS actor_id + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + avg(step_2_conversion_time) step_2_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner, + median(step_2_conversion_time) step_2_median_conversion_time_inner , + prop + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target, + prop) as max_steps, + step_1_conversion_time, + step_2_conversion_time , + prop + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY + AND latest_1 <= latest_2 + AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + if(isNotNull(latest_2) + AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time, + prop + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , + prop + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + if(latest_2 < latest_1, NULL, latest_2) as latest_2 , + prop + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , + if(has(['technology', 'finance'], prop), prop, 'Other') as prop + FROM + (SELECT *, + prop_vals as prop + FROM + (SELECT e.timestamp as timestamp, + pdi.person_id as aggregation_target, + pdi.person_id as person_id, + if(event = 'sign up', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'play movie', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'buy', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2, + replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, + prop_basic as prop, + argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals + FROM events e + INNER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 2 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) ))))) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps, + prop + HAVING steps = max_steps) + WHERE steps IN [2, 3] + AND arrayFlatten(array(prop)) = arrayFlatten(array('technology')) + ORDER BY aggregation_target + LIMIT 100 + OFFSET 0 SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000 + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_step_multiple_breakdown_snapshot + ''' + SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), ''), ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), '')] AS value, + count(*) AS count + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), in(e.event, tuple('buy', 'sign up')), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(e__pdi.person_id)) + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_step_multiple_breakdown_snapshot.1 + ''' + SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, + countIf(ifNull(equals(steps, 2), 0)) AS step_2, + avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, + median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + max(steps) OVER (PARTITION BY aggregation_target, + prop) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + prop AS prop, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + if(has([['', ''], ['alakazam', ''], ['Safari', 'xyz'], ['Mac', ''], ['Chrome', 'xyz'], ['0', '0'], ['', 'no-mac']], prop), prop, ['Other']) AS prop + FROM + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + prop_basic AS prop_basic, + prop, + prop_vals AS prop_vals, + if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['', '']) AS prop + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS aggregation_target, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'buy'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), ''), ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), '')] AS prop_basic, + prop_basic AS prop, + argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps, + prop + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + GROUP BY prop + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- diff --git a/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py b/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py new file mode 100644 index 0000000000000..39359a906656b --- /dev/null +++ b/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py @@ -0,0 +1,3056 @@ +from dataclasses import dataclass +from datetime import datetime + +from string import ascii_lowercase +from typing import Any, Callable, Dict, List, Literal, Optional, Union, cast + +from posthog.constants import INSIGHT_FUNNELS, FunnelOrderType +from posthog.hogql_queries.insights.funnels.funnels_query_runner import FunnelsQueryRunner +from posthog.hogql_queries.legacy_compatibility.filter_to_query import filter_to_query +from posthog.models.action.action import Action + +from posthog.models.cohort import Cohort +from posthog.models.filters import Filter +from posthog.models.group.util import create_group +from posthog.models.group_type_mapping import GroupTypeMapping +from posthog.models.instance_setting import override_instance_config +from posthog.models.person.person import Person +from posthog.queries.breakdown_props import ALL_USERS_COHORT_ID +from posthog.schema import FunnelsQuery +from posthog.test.base import ( + APIBaseTest, + also_test_with_materialized_columns, + also_test_with_person_on_events_v2, + snapshot_clickhouse_queries, +) +from posthog.test.test_journeys import journeys_for + + +@dataclass(frozen=True) +class FunnelStepResult: + name: str + count: int + breakdown: Union[List[str], str] + average_conversion_time: Optional[float] = None + median_conversion_time: Optional[float] = None + type: Literal["events", "actions"] = "events" + action_id: Optional[str] = None + + +def funnel_breakdown_test_factory( + funnel_order_type: FunnelOrderType, + FunnelPerson, + _create_action: Callable[..., Action], + _create_person: Callable[..., Person], +): + class TestFunnelBreakdown(APIBaseTest): + def _get_actor_ids_at_step(self, filter, funnel_step, breakdown_value=None): + filter = Filter(data=filter, team=self.team) + person_filter = filter.shallow_clone({"funnel_step": funnel_step, "funnel_step_breakdown": breakdown_value}) + _, serialized_result, _ = FunnelPerson(person_filter, self.team).get_actors() + + return [val["id"] for val in serialized_result] + + def _create_groups(self): + GroupTypeMapping.objects.create(team=self.team, group_type="organization", group_type_index=0) + GroupTypeMapping.objects.create(team=self.team, group_type="company", group_type_index=1) + + create_group( + team_id=self.team.pk, + group_type_index=0, + group_key="org:5", + properties={"industry": "finance"}, + ) + create_group( + team_id=self.team.pk, + group_type_index=0, + group_key="org:6", + properties={"industry": "technology"}, + ) + create_group( + team_id=self.team.pk, + group_type_index=1, + group_key="org:5", + properties={"industry": "random"}, + ) + + def _assert_funnel_breakdown_result_is_correct(self, result, steps: List[FunnelStepResult]): + def funnel_result(step: FunnelStepResult, order: int) -> Dict[str, Any]: + return { + "action_id": step.name if step.type == "events" else step.action_id, + "name": step.name, + "custom_name": None, + "order": order, + "people": [], + "count": step.count, + "type": step.type, + "average_conversion_time": step.average_conversion_time, + "median_conversion_time": step.median_conversion_time, + "breakdown": step.breakdown, + "breakdown_value": step.breakdown, + **( + { + "action_id": None, + "name": f"Completed {order+1} step{'s' if order > 0 else ''}", + } + if funnel_order_type == FunnelOrderType.UNORDERED + else {} + ), + } + + step_results = [] + for index, step_result in enumerate(steps): + step_results.append(funnel_result(step_result, index)) + + assert_funnel_results_equal(result, step_results) + + @also_test_with_materialized_columns(["$browser", "$browser_version"]) + def test_funnel_step_multi_property_breakdown_event(self): + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, + "events": [ + {"id": "sign up", "order": 0}, + {"id": "play movie", "order": 1}, + {"id": "buy", "order": 2}, + ], + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": ["$browser", "$browser_version"], + } + + journey = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": { + "key": "val", + "$browser": "Chrome", + "$browser_version": 95, + }, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 1, 13), + "properties": { + "key": "val", + "$browser": "Chrome", + "$browser_version": 95, + }, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 1, 15), + "properties": { + "key": "val", + "$browser": "Chrome", + "$browser_version": 95, + }, + }, + ], + "person2": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": { + "key": "val", + "$browser": "Safari", + "$browser_version": 15, + }, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 2, 16), + "properties": { + "key": "val", + "$browser": "Safari", + "$browser_version": 15, + }, + }, + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": { + "key": "val", + "$browser": "Safari", + "$browser_version": 14, + }, + } + ], + } + + people = journeys_for(events_by_person=journey, team=self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="sign up", breakdown=["Safari", "14"], count=1), + FunnelStepResult(name="play movie", breakdown=["Safari", "14"], count=0), + FunnelStepResult(name="buy", breakdown=["Safari", "14"], count=0), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, ["Safari", "14"]), + [people["person3"].uuid], + ) + self.assertCountEqual(self._get_actor_ids_at_step(filters, 2, ["Safari", "14"]), []) + + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="sign up", breakdown=["Safari", "15"], count=1), + FunnelStepResult( + name="play movie", + breakdown=["Safari", "15"], + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + FunnelStepResult(name="buy", breakdown=["Safari", "15"], count=0), + ], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, ["Safari", "15"]), + [people["person2"].uuid], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 2, ["Safari", "15"]), + [people["person2"].uuid], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[2], + [ + FunnelStepResult(name="sign up", breakdown=["Chrome", "95"], count=1), + FunnelStepResult( + name="play movie", + breakdown=["Chrome", "95"], + count=1, + average_conversion_time=3600.0, + median_conversion_time=3600.0, + ), + FunnelStepResult( + name="buy", + breakdown=["Chrome", "95"], + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + ], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, ["Chrome", "95"]), + [people["person1"].uuid], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 2, ["Chrome", "95"]), + [people["person1"].uuid], + ) + + @also_test_with_materialized_columns(["$browser"]) + def test_funnel_step_breakdown_event_with_string_only_breakdown(self): + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, + "events": [ + {"id": "sign up", "order": 0}, + {"id": "play movie", "order": 1}, + {"id": "buy", "order": 2}, + ], + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": "$browser", + } + + journey = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"key": "val", "$browser": "Chrome"}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 1, 13), + "properties": {"key": "val", "$browser": "Chrome"}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 1, 15), + "properties": {"key": "val", "$browser": "Chrome"}, + }, + ], + "person2": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"key": "val", "$browser": "Safari"}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 2, 16), + "properties": {"key": "val", "$browser": "Safari"}, + }, + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"key": "val", "$browser": "Safari"}, + } + ], + } + + people = journeys_for(events_by_person=journey, team=self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="sign up", breakdown=["Chrome"], count=1), + FunnelStepResult( + name="play movie", + breakdown=["Chrome"], + count=1, + average_conversion_time=3600.0, + median_conversion_time=3600.0, + ), + FunnelStepResult( + name="buy", + breakdown=["Chrome"], + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + ], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "Chrome"), + [people["person1"].uuid], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 2, "Chrome"), + [people["person1"].uuid], + ) + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="sign up", breakdown=["Safari"], count=2), + FunnelStepResult( + name="play movie", + breakdown=["Safari"], + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + FunnelStepResult(name="buy", breakdown=["Safari"], count=0), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "Safari"), + [people["person2"].uuid, people["person3"].uuid], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 2, "Safari"), + [people["person2"].uuid], + ) + + @also_test_with_materialized_columns(["$browser"]) + def test_funnel_step_breakdown_event(self): + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, + "events": [ + {"id": "sign up", "order": 0}, + {"id": "play movie", "order": 1}, + {"id": "buy", "order": 2}, + ], + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": ["$browser"], + } + + journey = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"key": "val", "$browser": "Chrome"}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 1, 13), + "properties": {"key": "val", "$browser": "Chrome"}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 1, 15), + "properties": {"key": "val", "$browser": "Chrome"}, + }, + ], + "person2": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"key": "val", "$browser": "Safari"}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 2, 16), + "properties": {"key": "val", "$browser": "Safari"}, + }, + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"key": "val", "$browser": "Safari"}, + } + ], + } + + people = journeys_for(events_by_person=journey, team=self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="sign up", breakdown=["Chrome"], count=1), + FunnelStepResult( + name="play movie", + breakdown=["Chrome"], + count=1, + average_conversion_time=3600.0, + median_conversion_time=3600.0, + ), + FunnelStepResult( + name="buy", + breakdown=["Chrome"], + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + ], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "Chrome"), + [people["person1"].uuid], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 2, "Chrome"), + [people["person1"].uuid], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="sign up", breakdown=["Safari"], count=2), + FunnelStepResult( + name="play movie", + breakdown=["Safari"], + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + FunnelStepResult(name="buy", breakdown=["Safari"], count=0), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "Safari"), + [people["person2"].uuid, people["person3"].uuid], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 2, "Safari"), + [people["person2"].uuid], + ) + + @also_test_with_materialized_columns(["$browser"]) + def test_funnel_step_breakdown_event_with_other(self): + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, + "events": [ + {"id": "sign up", "order": 0}, + {"id": "play movie", "order": 1}, + {"id": "buy", "order": 2}, + ], + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": ["$browser"], + "breakdown_limit": 1, + } + + events_by_person = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$browser": "Chrome"}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 1, 13), + "properties": {"$browser": "Chrome"}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 1, 15), + "properties": {"$browser": "Chrome"}, + }, + ], + "person2": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Safari"}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 2, 16), + "properties": {"$browser": "Safari"}, + }, + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Safari"}, + } + ], + "person4": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "random"}, + } + ], + "person5": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 15), + "properties": {"$browser": "another one"}, + } + ], + } + + people = journeys_for(events_by_person, self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + results = sort_breakdown_funnel_results(results) + + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="sign up", breakdown=["Safari"], count=2), + FunnelStepResult( + name="play movie", + breakdown=["Safari"], + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + FunnelStepResult(name="buy", breakdown=["Safari"], count=0), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "Safari"), + [people["person2"].uuid, people["person3"].uuid], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 2, "Safari"), + [people["person2"].uuid], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="sign up", breakdown=["Other"], count=3), + FunnelStepResult( + name="play movie", + breakdown=["Other"], + count=1, + average_conversion_time=3600.0, + median_conversion_time=3600.0, + ), + FunnelStepResult( + name="buy", + breakdown=["Other"], + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "Other"), + [ + people["person1"].uuid, + people["person4"].uuid, + people["person5"].uuid, + ], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 2, "Other"), + [people["person1"].uuid], + ) + + @also_test_with_materialized_columns(["$browser"]) + def test_funnel_step_breakdown_event_no_type(self): + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, + "events": [ + {"id": "sign up", "order": 0}, + {"id": "play movie", "order": 1}, + {"id": "buy", "order": 2}, + ], + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown": ["$browser"], + } + + events_by_person = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$browser": "Chrome"}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 1, 13), + "properties": {"$browser": "Chrome"}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 1, 15), + "properties": {"$browser": "Chrome"}, + }, + ], + "person2": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Safari"}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 2, 16), + "properties": {"$browser": "Safari"}, + }, + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Safari"}, + } + ], + } + + people = journeys_for(events_by_person, self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="sign up", breakdown=["Chrome"], count=1), + FunnelStepResult( + name="play movie", + breakdown=["Chrome"], + count=1, + average_conversion_time=3600.0, + median_conversion_time=3600.0, + ), + FunnelStepResult( + name="buy", + breakdown=["Chrome"], + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "Chrome"), + [people["person1"].uuid], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 2, "Chrome"), + [people["person1"].uuid], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="sign up", breakdown=["Safari"], count=2), + FunnelStepResult( + name="play movie", + breakdown=["Safari"], + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + FunnelStepResult(name="buy", breakdown=["Safari"], count=0), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "Safari"), + [people["person2"].uuid, people["person3"].uuid], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 2, "Safari"), + [people["person2"].uuid], + ) + + @also_test_with_materialized_columns(person_properties=["$browser"]) + def test_funnel_step_breakdown_person(self): + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, + "events": [ + {"id": "sign up", "order": 0}, + {"id": "play movie", "order": 1}, + {"id": "buy", "order": 2}, + ], + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "person", + "breakdown": ["$browser"], + } + + person1 = _create_person( + distinct_ids=["person1"], + team_id=self.team.pk, + properties={"$browser": "Chrome"}, + ) + person2 = _create_person( + distinct_ids=["person2"], + team_id=self.team.pk, + properties={"$browser": "Safari"}, + ) + + peoples_journeys = { + "person1": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 1, 12)}, + {"event": "play movie", "timestamp": datetime(2020, 1, 1, 13)}, + {"event": "buy", "timestamp": datetime(2020, 1, 1, 15)}, + ], + "person2": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 2, 14)}, + {"event": "play movie", "timestamp": datetime(2020, 1, 2, 16)}, + ], + } + journeys_for(peoples_journeys, self.team, create_people=False) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="sign up", breakdown=["Chrome"], count=1), + FunnelStepResult( + name="play movie", + breakdown=["Chrome"], + count=1, + average_conversion_time=3600.0, + median_conversion_time=3600.0, + ), + FunnelStepResult( + name="buy", + breakdown=["Chrome"], + count=1, + average_conversion_time=7200, + median_conversion_time=7200, + ), + ], + ) + + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "Chrome"), [person1.uuid]) + self.assertCountEqual(self._get_actor_ids_at_step(filters, 2, "Chrome"), [person1.uuid]) + + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="sign up", breakdown=["Safari"], count=1), + FunnelStepResult( + name="play movie", + breakdown=["Safari"], + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + FunnelStepResult(name="buy", breakdown=["Safari"], count=0), + ], + ) + + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "Safari"), [person2.uuid]) + self.assertCountEqual(self._get_actor_ids_at_step(filters, 3, "Safari"), []) + + @also_test_with_materialized_columns(["some_breakdown_val"]) + def test_funnel_step_breakdown_limit(self): + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, + "events": [ + {"id": "sign up", "order": 0}, + {"id": "play movie", "order": 1}, + {"id": "buy", "order": 2}, + ], + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": ["some_breakdown_val"], + "breakdown_limit": 5, + } + + events_by_person = {} + for num in range(10): + for i in range(num): + person_id = f"person_{num}_{i}" + events_by_person[person_id] = [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"some_breakdown_val": str(num)}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 1, 13), + "properties": {"some_breakdown_val": str(num)}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 1, 15), + "properties": {"some_breakdown_val": str(num)}, + }, + ] + journeys_for(events_by_person, self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + # assert that we give 5 at a time at most and that those values are the most popular ones + breakdown_vals = sorted([res[0]["breakdown"] for res in results]) + self.assertEqual([["5"], ["6"], ["7"], ["8"], ["9"], ["Other"]], breakdown_vals) + + @also_test_with_materialized_columns(["some_breakdown_val"]) + def test_funnel_step_custom_breakdown_limit_with_nulls(self): + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, + "events": [ + {"id": "sign up", "order": 0}, + {"id": "play movie", "order": 1}, + {"id": "buy", "order": 2}, + ], + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown_limit": 3, + "breakdown": ["some_breakdown_val"], + } + + events_by_person = {} + for num in range(5): + for i in range(num): + person_id = f"person_{num}_{i}" + events_by_person[person_id] = [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"some_breakdown_val": str(num)}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 1, 13), + "properties": {"some_breakdown_val": str(num)}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 1, 15), + "properties": {"some_breakdown_val": str(num)}, + }, + ] + + # no breakdown value for this guy + events_by_person["person_null"] = [ + {"event": "sign up", "timestamp": datetime(2020, 1, 1, 12)}, + {"event": "play movie", "timestamp": datetime(2020, 1, 1, 13)}, + {"event": "buy", "timestamp": datetime(2020, 1, 1, 15)}, + ] + people = journeys_for(events_by_person, self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + breakdown_vals = sorted([res[0]["breakdown"] for res in results]) + self.assertEqual([["2"], ["3"], ["4"], ["Other"]], breakdown_vals) + # skipped 1 and '' because the limit was 3. + self.assertTrue(people["person_null"].uuid in self._get_actor_ids_at_step(filters, 1, "Other")) + + @also_test_with_materialized_columns(["some_breakdown_val"]) + def test_funnel_step_custom_breakdown_limit_with_nulls_included(self): + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, + "events": [ + {"id": "sign up", "order": 0}, + {"id": "play movie", "order": 1}, + {"id": "buy", "order": 2}, + ], + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown_limit": 6, + "breakdown": ["some_breakdown_val"], + } + + events_by_person = {} + for num in range(5): + for i in range(num): + person_id = f"person_{num}_{i}" + events_by_person[person_id] = [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"some_breakdown_val": str(num)}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 1, 13), + "properties": {"some_breakdown_val": str(num)}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 1, 15), + "properties": {"some_breakdown_val": str(num)}, + }, + ] + + # no breakdown value for this guy + events_by_person["person_null"] = [ + {"event": "sign up", "timestamp": datetime(2020, 1, 1, 12)}, + {"event": "play movie", "timestamp": datetime(2020, 1, 1, 13)}, + {"event": "buy", "timestamp": datetime(2020, 1, 1, 15)}, + ] + people = journeys_for(events_by_person, self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + breakdown_vals = sorted([res[0]["breakdown"] for res in results]) + self.assertEqual([[""], ["1"], ["2"], ["3"], ["4"]], breakdown_vals) + # included 1 and '' because the limit was 6. + + for i in range(1, 5): + self.assertEqual(len(self._get_actor_ids_at_step(filters, 3, str(i))), i) + + self.assertEqual([people["person_null"].uuid], self._get_actor_ids_at_step(filters, 1, "")) + self.assertEqual([people["person_null"].uuid], self._get_actor_ids_at_step(filters, 3, "")) + + @also_test_with_materialized_columns(["$browser"]) + def test_funnel_step_breakdown_event_single_person_multiple_breakdowns(self): + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, + "events": [{"id": "sign up", "order": 0}, {"id": "other event", "order": 0}], + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": ["$browser"], + "breakdown_attribution_type": "step", + "breakdown_attribution_value": "0", + } + + # event + events_by_person = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$browser": "Chrome"}, + }, + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 13), + "properties": {"$browser": "Safari"}, + }, + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Mac"}, + }, + # mixed property type! + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 15), + "properties": {"$browser": 0}, + }, + ] + } + people = journeys_for(events_by_person, self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + results = sorted(results, key=lambda res: res[0]["breakdown"]) + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="sign up", breakdown=["0"], count=1), + FunnelStepResult(name="other event", breakdown=["0"], count=0), + ], + ) + + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "0"), [people["person1"].uuid]) + + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Chrome"]), + FunnelStepResult(name="other event", breakdown=["Chrome"], count=0), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "Chrome"), + [people["person1"].uuid], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[2], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Mac"]), + FunnelStepResult(name="other event", breakdown=["Mac"], count=0), + ], + ) + + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "Mac"), [people["person1"].uuid]) + + self._assert_funnel_breakdown_result_is_correct( + results[3], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Safari"]), + FunnelStepResult(name="other event", breakdown=["Safari"], count=0), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "Safari"), + [people["person1"].uuid], + ) + + def test_funnel_step_breakdown_event_single_person_events_with_multiple_properties(self): + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, + "events": [ + {"id": "sign up", "order": 0}, + {"id": "play movie", "order": 1}, + ], + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": ["$browser"], + "breakdown_attribution_type": "all_events", + } + + people = journeys_for( + { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$browser": "Chrome"}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 2, 12, 30), + "properties": {"$browser": "Safari"}, + }, + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 13), + "properties": {"$browser": "Safari"}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Safari"}, + }, + ] + }, + self.team, + ) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self.assertEqual(len(results), 2) + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Chrome"]), + FunnelStepResult(name="play movie", count=0, breakdown=["Chrome"]), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "Chrome"), + [people["person1"].uuid], + ) + self.assertCountEqual(self._get_actor_ids_at_step(filters, 2, "Chrome"), []) + + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Safari"]), + FunnelStepResult( + name="play movie", + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + breakdown=["Safari"], + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "Safari"), + [people["person1"].uuid], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 2, "Safari"), + [people["person1"].uuid], + ) + + @also_test_with_materialized_columns(person_properties=["key"], verify_no_jsonextract=False) + def test_funnel_cohort_breakdown(self): + # This caused some issues with SQL parsing + _create_person( + distinct_ids=[f"person1"], + team_id=self.team.pk, + properties={"key": "value"}, + ) + people = journeys_for( + {"person1": [{"event": "sign up", "timestamp": datetime(2020, 1, 2, 12)}]}, + self.team, + create_people=False, + ) + + cohort = Cohort.objects.create( + team=self.team, + name="test_cohort", + groups=[{"properties": [{"key": "key", "value": "value", "type": "person"}]}], + ) + cohort.calculate_people_ch(pending_version=0) + + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, + "events": [ + {"id": "sign up", "order": 0}, + {"id": "play movie", "order": 1}, + {"id": "buy", "order": 2}, + ], + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "cohort", + "breakdown": ["all", cohort.pk], + "breakdown_attribution_type": "step", + "breakdown_attribution_value": 0, + # first touch means same user can't be in 'all' and the other cohort both + } + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self.assertEqual(len(results[0]), 3) + self.assertEqual(results[0][0]["breakdown"], "all users") + self.assertEqual(len(results[1]), 3) + self.assertEqual(results[1][0]["breakdown"], "test_cohort") + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, cohort.pk), + [people["person1"].uuid], + ) + self.assertCountEqual(self._get_actor_ids_at_step(filters, 2, cohort.pk), []) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, ALL_USERS_COHORT_ID), + [people["person1"].uuid], + ) + self.assertCountEqual(self._get_actor_ids_at_step(filters, 2, ALL_USERS_COHORT_ID), []) + + # non array + filters = { + "events": [ + {"id": "sign up", "order": 0}, + {"id": "play movie", "order": 1}, + {"id": "buy", "order": 2}, + ], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "cohort", + "breakdown": cohort.pk, + } + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self.assertEqual(len(results[0]), 3) + self.assertEqual(results[0][0]["breakdown"], "test_cohort") + self.assertEqual(results[0][0]["breakdown_value"], cohort.pk) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, cohort.pk), + [people["person1"].uuid], + ) + self.assertCountEqual(self._get_actor_ids_at_step(filters, 2, cohort.pk), []) + + def test_basic_funnel_default_funnel_days_breakdown_event(self): + events_by_person = { + "user_1": [ + { + "event": "user signed up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$current_url": "https://posthog.com/docs/x"}, + }, + { + "event": "paid", + "timestamp": datetime(2020, 1, 10, 14), + "properties": {"$current_url": "https://posthog.com/docs/x"}, + }, + ] + } + # Dummy events to make sure that breakdown is not confused + # It was confused before due to the nature of fetching breakdown values with a LIMIT based on value popularity + # See https://github.com/PostHog/posthog/pull/5496 + for current_url_letter in ascii_lowercase[:20]: + # Twenty dummy breakdown values + for _ in range(2): + # Each twice, so that the breakdown values from dummy events rank higher in raw order + # This test makes sure that events are prefiltered properly to avoid problems with this raw order + events_by_person["user_1"].append( + { + "event": "user signed up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$current_url": f"https://posthog.com/blog/{current_url_letter}"}, + } + ) + + journeys_for(events_by_person, self.team) + + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, + "events": [ + { + "id": "user signed up", + "type": "events", + "order": 0, + "properties": [ + { + "key": "$current_url", + "operator": "icontains", + "type": "event", + "value": "https://posthog.com/docs", + } + ], + }, + {"id": "paid", "type": "events", "order": 1}, + ], + "date_from": "2020-01-01", + "date_to": "2020-01-14", + "breakdown": ["$current_url"], + "breakdown_type": "event", + } + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult( + name="user signed up", + count=1, + breakdown=["https://posthog.com/docs/x"], + ), + FunnelStepResult( + name="paid", + count=1, + average_conversion_time=691200.0, + median_conversion_time=691200.0, + breakdown=["https://posthog.com/docs/x"], + ), + ], + ) + + @also_test_with_materialized_columns(["$current_url"]) + def test_basic_funnel_default_funnel_days_breakdown_action(self): + # Same case as test_basic_funnel_default_funnel_days_breakdown_event but with an action + user_signed_up_action = _create_action(name="user signed up", event="user signed up", team=self.team) + + events_by_person = { + "user_1": [ + { + "event": "user signed up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$current_url": "https://posthog.com/docs/x"}, + }, + { + "event": "paid", + "timestamp": datetime(2020, 1, 10, 14), + "properties": {"$current_url": "https://posthog.com/docs/x"}, + }, + ] + } + for current_url_letter in ascii_lowercase[:20]: + for _ in range(2): + events_by_person["user_1"].append( + { + "event": "user signed up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$current_url": f"https://posthog.com/blog/{current_url_letter}"}, + } + ) + + journeys_for(events_by_person, self.team) + + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, + "actions": [ + { + "id": user_signed_up_action.id, + "order": 0, + "properties": [ + { + "key": "$current_url", + "operator": "icontains", + "type": "event", + "value": "https://posthog.com/docs", + } + ], + } + ], + "events": [{"id": "paid", "type": "events", "order": 1}], + "date_from": "2020-01-01", + "date_to": "2020-01-14", + "breakdown": ["$current_url"], + "breakdown_type": "event", + } + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult( + name="user signed up", + count=1, + breakdown=["https://posthog.com/docs/x"], + type="actions", + action_id=user_signed_up_action.id, # type: ignore + ), + FunnelStepResult( + name="paid", + count=1, + average_conversion_time=691200.0, + median_conversion_time=691200.0, + breakdown=["https://posthog.com/docs/x"], + ), + ], + ) + + def test_funnel_step_breakdown_with_first_touch_attribution(self): + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, + "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": ["$browser"], + "breakdown_attribution_type": "first_touch", + } + + # event + events_by_person = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$browser": "Chrome"}, + }, + {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, + ], + "person2": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 13), + "properties": {"$browser": "Safari"}, + }, + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Mac"}, + }, + {"event": "buy", "timestamp": datetime(2020, 1, 2, 15)}, + ], + "person4": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 15), + "properties": {"$browser": 0}, + }, + # first touch means alakazam is disregarded + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 16), + "properties": {"$browser": "alakazam"}, + }, + ], + # no properties dude, represented by '' + "person5": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 2, 15)}, + {"event": "buy", "timestamp": datetime(2020, 1, 2, 16)}, + ], + } + people = journeys_for(events_by_person, self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + results = sorted(results, key=lambda res: res[0]["breakdown"]) + + self.assertEqual(len(results), 5) + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="sign up", breakdown=[""], count=1), + FunnelStepResult( + name="buy", + breakdown=[""], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, ""), [people["person5"].uuid]) + + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="sign up", breakdown=["0"], count=1), + FunnelStepResult( + name="buy", + breakdown=["0"], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "0"), [people["person4"].uuid]) + + self._assert_funnel_breakdown_result_is_correct( + results[2], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Chrome"]), + FunnelStepResult( + name="buy", + breakdown=["Chrome"], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "Chrome"), + [people["person1"].uuid], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[3], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Mac"]), + FunnelStepResult( + name="buy", + breakdown=["Mac"], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "Mac"), [people["person3"].uuid]) + + self._assert_funnel_breakdown_result_is_correct( + results[4], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Safari"]), + FunnelStepResult( + name="buy", + breakdown=["Safari"], + count=1, + average_conversion_time=86400, + median_conversion_time=86400, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "Safari"), + [people["person2"].uuid], + ) + + def test_funnel_step_breakdown_with_last_touch_attribution(self): + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, + "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": ["$browser"], + "breakdown_attribution_type": "last_touch", + } + + # event + events_by_person = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$browser": "Chrome"}, + }, + {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, + ], + "person2": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 13), + "properties": {"$browser": "Safari"}, + }, + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Mac"}, + }, + {"event": "buy", "timestamp": datetime(2020, 1, 2, 15)}, + ], + "person4": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 15), + "properties": {"$browser": 0}, + }, + # last touch means 0 is disregarded + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 16), + "properties": {"$browser": "Alakazam"}, + }, + ], + # no properties dude, represented by '' + "person5": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 2, 15)}, + {"event": "buy", "timestamp": datetime(2020, 1, 2, 16)}, + ], + } + people = journeys_for(events_by_person, self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + results = sorted(results, key=lambda res: res[0]["breakdown"]) + + self.assertEqual(len(results), 5) + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="sign up", breakdown=[""], count=1), + FunnelStepResult( + name="buy", + breakdown=[""], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, ""), [people["person5"].uuid]) + + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="sign up", breakdown=["Alakazam"], count=1), + FunnelStepResult( + name="buy", + breakdown=["Alakazam"], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "Alakazam"), + [people["person4"].uuid], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[2], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Chrome"]), + FunnelStepResult( + name="buy", + breakdown=["Chrome"], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "Chrome"), + [people["person1"].uuid], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[3], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Mac"]), + FunnelStepResult( + name="buy", + breakdown=["Mac"], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "Mac"), [people["person3"].uuid]) + + self._assert_funnel_breakdown_result_is_correct( + results[4], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Safari"]), + FunnelStepResult( + name="buy", + breakdown=["Safari"], + count=1, + average_conversion_time=86400, + median_conversion_time=86400, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "Safari"), + [people["person2"].uuid], + ) + + def test_funnel_step_breakdown_with_step_attribution(self): + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, + "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": ["$browser"], + "breakdown_attribution_type": "step", + "breakdown_attribution_value": "0", + } + + # event + events_by_person = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$browser": "Chrome"}, + }, + {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, + ], + "person2": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 13), + "properties": {"$browser": "Safari"}, + }, + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Mac"}, + }, + {"event": "buy", "timestamp": datetime(2020, 1, 2, 15)}, + ], + "person4": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 15), + "properties": {"$browser": 0}, + }, + # step attribution means alakazam is valid when step = 1 + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 16), + "properties": {"$browser": "alakazam"}, + }, + ], + } + people = journeys_for(events_by_person, self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + results = sorted(results, key=lambda res: res[0]["breakdown"]) + + self.assertEqual(len(results), 4) + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="sign up", breakdown=[""], count=1), + FunnelStepResult( + name="buy", + breakdown=[""], + count=1, + average_conversion_time=86400, + median_conversion_time=86400, + ), + ], + ) + + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, ""), [people["person2"].uuid]) + + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="sign up", breakdown=["0"], count=1), + FunnelStepResult( + name="buy", + breakdown=["0"], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "0"), [people["person4"].uuid]) + + self._assert_funnel_breakdown_result_is_correct( + results[2], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Chrome"]), + FunnelStepResult( + name="buy", + breakdown=["Chrome"], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "Chrome"), + [people["person1"].uuid], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[3], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Mac"]), + FunnelStepResult( + name="buy", + breakdown=["Mac"], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "Mac"), [people["person3"].uuid]) + + def test_funnel_step_breakdown_with_step_one_attribution(self): + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, + "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": ["$browser"], + "breakdown_attribution_type": "step", + "breakdown_attribution_value": "1", + } + + # event + events_by_person = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$browser": "Chrome"}, + }, + {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, + ], + "person2": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 13), + "properties": {"$browser": "Safari"}, + }, + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Mac"}, + }, + {"event": "buy", "timestamp": datetime(2020, 1, 2, 15)}, + ], + "person4": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 15), + "properties": {"$browser": 0}, + }, + # step attribution means alakazam is valid when step = 1 + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 16), + "properties": {"$browser": "alakazam"}, + }, + ], + } + people = journeys_for(events_by_person, self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + results = sorted(results, key=lambda res: res[0]["breakdown"]) + + self.assertEqual(len(results), 3) + # Chrome and Mac goes away, Safari comes back + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="sign up", breakdown=[""], count=2), + FunnelStepResult( + name="buy", + breakdown=[""], + count=2, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, ""), + [people["person1"].uuid, people["person3"].uuid], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Safari"]), + FunnelStepResult( + name="buy", + breakdown=["Safari"], + count=1, + average_conversion_time=86400, + median_conversion_time=86400, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "Safari"), + [people["person2"].uuid], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[2], + [ + FunnelStepResult(name="sign up", breakdown=["alakazam"], count=1), + FunnelStepResult( + name="buy", + breakdown=["alakazam"], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "alakazam"), + [people["person4"].uuid], + ) + + def test_funnel_step_multiple_breakdown_with_first_touch_attribution(self): + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, + "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": ["$browser", "$version"], + "breakdown_attribution_type": "first_touch", + } + + # event + events_by_person = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$browser": "Chrome", "$version": "xyz"}, + }, + {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, + ], + "person2": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 13), + "properties": {"$browser": "Safari", "$version": "xyz"}, + }, + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Mac"}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 15), + "properties": {"$version": "no-mac"}, + }, + ], + "person4": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 15), + "properties": {"$browser": 0, "$version": 0}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 16), + "properties": {"$browser": "alakazam"}, + }, + ], + # no properties dude, represented by '' + "person5": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 2, 15)}, + {"event": "buy", "timestamp": datetime(2020, 1, 2, 16)}, + ], + } + people = journeys_for(events_by_person, self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + results = sorted(results, key=lambda res: res[0]["breakdown"]) + + self.assertEqual(len(results), 5) + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="sign up", breakdown=["", ""], count=1), + FunnelStepResult( + name="buy", + breakdown=["", ""], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, ["", ""]), + [people["person5"].uuid], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="sign up", breakdown=["0", "0"], count=1), + FunnelStepResult( + name="buy", + breakdown=["0", "0"], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, ["0", "0"]), + [people["person4"].uuid], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[2], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Chrome", "xyz"]), + FunnelStepResult( + name="buy", + breakdown=["Chrome", "xyz"], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, ["Chrome", "xyz"]), + [people["person1"].uuid], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[3], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Mac", ""]), + FunnelStepResult( + name="buy", + breakdown=["Mac", ""], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, ["Mac", ""]), + [people["person3"].uuid], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[4], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Safari", "xyz"]), + FunnelStepResult( + name="buy", + breakdown=["Safari", "xyz"], + count=1, + average_conversion_time=86400, + median_conversion_time=86400, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, ["Safari", "xyz"]), + [people["person2"].uuid], + ) + + def test_funnel_step_multiple_breakdown_with_first_touch_attribution_incomplete_funnel(self): + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, + "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": ["$browser", "$version"], + "breakdown_attribution_type": "first_touch", + } + + # event + events_by_person = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$browser": "Chrome", "$version": "xyz"}, + }, + {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, + ], + "person2": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 13), + "properties": {"$browser": "Safari", "$version": "xyz"}, + }, + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Mac"}, + }, + # {"event": "buy", "timestamp": datetime(2020, 1, 2, 15), "properties": {"$version": "no-mac"}}, + ], + "person4": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 15), + "properties": {"$browser": 0, "$version": 0}, + }, + # {"event": "buy", "timestamp": datetime(2020, 1, 2, 16), "properties": {"$browser": "alakazam"}}, + ], + # no properties dude, represented by '' + "person5": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 2, 15)}, + {"event": "buy", "timestamp": datetime(2020, 1, 2, 16)}, + ], + } + people = journeys_for(events_by_person, self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + results = sorted(results, key=lambda res: res[0]["breakdown"]) + + self.assertEqual(len(results), 5) + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="sign up", breakdown=["", ""], count=1), + FunnelStepResult( + name="buy", + breakdown=["", ""], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, ["", ""]), + [people["person5"].uuid], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="sign up", breakdown=["0", "0"], count=1), + FunnelStepResult(name="buy", breakdown=["0", "0"], count=0), + ], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, ["0", "0"]), + [people["person4"].uuid], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[2], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Chrome", "xyz"]), + FunnelStepResult( + name="buy", + breakdown=["Chrome", "xyz"], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, ["Chrome", "xyz"]), + [people["person1"].uuid], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[3], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Mac", ""]), + FunnelStepResult(name="buy", breakdown=["Mac", ""], count=0), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, ["Mac", ""]), + [people["person3"].uuid], + ) + self.assertCountEqual(self._get_actor_ids_at_step(filters, 2, ["Mac", ""]), []) + + self._assert_funnel_breakdown_result_is_correct( + results[4], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Safari", "xyz"]), + FunnelStepResult( + name="buy", + breakdown=["Safari", "xyz"], + count=1, + average_conversion_time=86400, + median_conversion_time=86400, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, ["Safari", "xyz"]), + [people["person2"].uuid], + ) + + def test_funnel_step_breakdown_with_step_one_attribution_incomplete_funnel(self): + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, + "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": ["$browser"], + "breakdown_attribution_type": "step", + "breakdown_attribution_value": "1", + } + + # event + events_by_person = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$browser": "Chrome"}, + }, + {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, + ], + "person2": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + # {"event": "buy", "timestamp": datetime(2020, 1, 2, 13), "properties": {"$browser": "Safari"}} + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Mac"}, + }, + # {"event": "buy", "timestamp": datetime(2020, 1, 2, 15)} + ], + "person4": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 15), + "properties": {"$browser": 0}, + }, + # step attribution means alakazam is valid when step = 1 + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 16), + "properties": {"$browser": "alakazam"}, + }, + ], + } + people = journeys_for(events_by_person, self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + results = sorted(results, key=lambda res: res[0]["breakdown"]) + + # Breakdown by step_1 means funnel items that never reach step_1 are NULLed out + self.assertEqual(len(results), 2) + # Chrome and Mac and Safari goes away + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="sign up", breakdown=[""], count=1), + FunnelStepResult( + name="buy", + breakdown=[""], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, ""), [people["person1"].uuid]) + + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="sign up", breakdown=["alakazam"], count=1), + FunnelStepResult( + name="buy", + breakdown=["alakazam"], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "alakazam"), + [people["person4"].uuid], + ) + + def test_funnel_step_non_array_breakdown_with_step_one_attribution_incomplete_funnel(self): + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, + "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": "$browser", + "breakdown_attribution_type": "step", + "breakdown_attribution_value": "1", + } + + # event + events_by_person = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$browser": "Chrome"}, + }, + {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, + ], + "person2": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + # {"event": "buy", "timestamp": datetime(2020, 1, 2, 13), "properties": {"$browser": "Safari"}} + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Mac"}, + }, + # {"event": "buy", "timestamp": datetime(2020, 1, 2, 15)} + ], + "person4": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 15), + "properties": {"$browser": 0}, + }, + # step attribution means alakazam is valid when step = 1 + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 16), + "properties": {"$browser": "alakazam"}, + }, + ], + } + people = journeys_for(events_by_person, self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + results = sorted(results, key=lambda res: res[0]["breakdown"]) + + # Breakdown by step_1 means funnel items that never reach step_1 are NULLed out + self.assertEqual(len(results), 2) + # Chrome and Mac and Safari goes away + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="sign up", breakdown=[""], count=1), + FunnelStepResult( + name="buy", + breakdown=[""], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, ""), [people["person1"].uuid]) + + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="sign up", breakdown=["alakazam"], count=1), + FunnelStepResult( + name="buy", + breakdown=["alakazam"], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "alakazam"), + [people["person4"].uuid], + ) + + @snapshot_clickhouse_queries + def test_funnel_step_multiple_breakdown_snapshot(self): + # No person querying here, so snapshots are more legible + + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, + "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": ["$browser", "$version"], + "breakdown_attribution_type": "first_touch", + } + + # event + events_by_person = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$browser": "Chrome", "$version": "xyz"}, + }, + {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, + ], + "person2": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 13), + "properties": {"$browser": "Safari", "$version": "xyz"}, + }, + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Mac"}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 15), + "properties": {"$version": "no-mac"}, + }, + ], + "person4": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 15), + "properties": {"$browser": 0, "$version": 0}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 16), + "properties": {"$browser": "alakazam"}, + }, + ], + # no properties dude, represented by '' + "person5": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 2, 15)}, + {"event": "buy", "timestamp": datetime(2020, 1, 2, 16)}, + ], + } + journeys_for(events_by_person, self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + results = sorted(results, key=lambda res: res[0]["breakdown"]) + + self.assertEqual(len(results), 5) + + @snapshot_clickhouse_queries + def test_funnel_breakdown_correct_breakdown_props_are_chosen(self): + # No person querying here, so snapshots are more legible + + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, + "events": [ + {"id": "sign up", "order": 0}, + { + "id": "buy", + "properties": [{"type": "event", "key": "$version", "value": "xyz"}], + "order": 1, + }, + ], + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": "$browser", + "breakdown_attribution_type": "first_touch", + } + + # event + events_by_person = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$browser": "Chrome", "$version": "xyz"}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 1, 13), + "properties": {"$browser": "Chrome"}, + }, + # discarded at step 1 because doesn't meet criteria + ], + "person2": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 13), + "properties": {"$browser": "Safari", "$version": "xyz"}, + }, + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Mac"}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 15), + "properties": {"$version": "xyz", "$browser": "Mac"}, + }, + ], + # no properties dude, represented by '', who finished step 0 + "person5": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 2, 15)}, + {"event": "buy", "timestamp": datetime(2020, 1, 2, 16)}, + ], + } + journeys_for(events_by_person, self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + results = sorted(results, key=lambda res: res[0]["breakdown"]) + + self.assertEqual(len(results), 4) + + self.assertCountEqual( + [res[0]["breakdown"] for res in results], + [["Mac"], ["Chrome"], ["Safari"], [""]], + ) + + @snapshot_clickhouse_queries + def test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step(self): + # No person querying here, so snapshots are more legible + + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, + "events": [ + {"id": "sign up", "order": 0}, + { + "id": "buy", + "properties": [{"type": "event", "key": "$version", "value": "xyz"}], + "order": 1, + }, + ], + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": "$browser", + "breakdown_attribution_type": "step", + "breakdown_attribution_value": "1", + } + + # event + events_by_person = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$browser": "Chrome", "$version": "xyz"}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 1, 13), + "properties": {"$browser": "Chrome"}, + }, + # discarded because doesn't meet criteria + ], + "person2": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 13), + "properties": {"$browser": "Safari", "$version": "xyz"}, + }, + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Mac"}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 15), + "properties": {"$version": "xyz", "$browser": "Mac"}, + }, + ], + # no properties dude, doesn't make it to step 1, and since breakdown on step 1, is discarded completely + "person5": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 2, 15)}, + {"event": "buy", "timestamp": datetime(2020, 1, 2, 16)}, + ], + } + journeys_for(events_by_person, self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + results = sorted(results, key=lambda res: res[0]["breakdown"]) + + self.assertEqual(len(results), 2) + + self.assertCountEqual([res[0]["breakdown"] for res in results], [["Mac"], ["Safari"]]) + + @snapshot_clickhouse_queries + def test_funnel_breakdown_group(self): + self._create_groups() + + people = journeys_for( + { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$group_0": "org:5", "$browser": "Chrome"}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 1, 13), + "properties": {"$group_0": "org:5", "$browser": "Chrome"}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 1, 15), + "properties": {"$group_0": "org:5", "$browser": "Chrome"}, + }, + ], + "person2": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$group_0": "org:6", "$browser": "Safari"}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 2, 16), + "properties": {"$group_0": "org:6", "$browser": "Safari"}, + }, + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$group_0": "org:6", "$browser": "Safari"}, + } + ], + }, + self.team, + ) + + filters = { + "events": [ + {"id": "sign up", "order": 0}, + {"id": "play movie", "order": 1}, + {"id": "buy", "order": 2}, + ], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown": "industry", + "breakdown_type": "group", + "breakdown_group_type_index": 0, + } + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="sign up", breakdown="finance", count=1), + FunnelStepResult( + name="play movie", + breakdown="finance", + count=1, + average_conversion_time=3600.0, + median_conversion_time=3600.0, + ), + FunnelStepResult( + name="buy", + breakdown="finance", + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + ], + ) + + # Querying persons when aggregating by persons should be ok, despite group breakdown + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "finance"), + [people["person1"].uuid], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 2, "finance"), + [people["person1"].uuid], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="sign up", breakdown="technology", count=2), + FunnelStepResult( + name="play movie", + breakdown="technology", + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + FunnelStepResult(name="buy", breakdown="technology", count=0), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "technology"), + [people["person2"].uuid, people["person3"].uuid], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 2, "technology"), + [people["person2"].uuid], + ) + + # TODO: Delete this test when moved to person-on-events + @also_test_with_person_on_events_v2 + def test_funnel_aggregate_by_groups_breakdown_group(self): + self._create_groups() + + journeys_for( + { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$group_0": "org:5", "$browser": "Chrome"}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 1, 13), + "properties": {"$group_0": "org:5", "$browser": "Chrome"}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 1, 15), + "properties": {"$group_0": "org:5", "$browser": "Chrome"}, + }, + ], + "person2": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$group_0": "org:6", "$browser": "Safari"}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 2, 16), + "properties": {"$group_0": "org:6", "$browser": "Safari"}, + }, + ], + "person3": [ + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 18), + "properties": {"$group_0": "org:6", "$browser": "Safari"}, + } + ], + }, + self.team, + ) + + filters = { + "events": [ + {"id": "sign up", "order": 0}, + {"id": "play movie", "order": 1}, + {"id": "buy", "order": 2}, + ], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown": "industry", + "breakdown_type": "group", + "breakdown_group_type_index": 0, + "aggregation_group_type_index": 0, + } + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="sign up", breakdown="finance", count=1), + FunnelStepResult( + name="play movie", + breakdown="finance", + count=1, + average_conversion_time=3600.0, + median_conversion_time=3600.0, + ), + FunnelStepResult( + name="buy", + breakdown="finance", + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + ], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="sign up", breakdown="technology", count=1), + FunnelStepResult( + name="play movie", + breakdown="technology", + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + FunnelStepResult( + name="buy", + breakdown="technology", + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + ], + ) + + @also_test_with_materialized_columns( + group_properties=[(0, "industry")], + materialize_only_with_person_on_events=True, + ) + @also_test_with_person_on_events_v2 + @snapshot_clickhouse_queries + def test_funnel_aggregate_by_groups_breakdown_group_person_on_events(self): + self._create_groups() + + journeys_for( + { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$group_0": "org:5", "$browser": "Chrome"}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 1, 13), + "properties": {"$group_0": "org:5", "$browser": "Chrome"}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 1, 15), + "properties": {"$group_0": "org:5", "$browser": "Chrome"}, + }, + ], + "person2": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$group_0": "org:6", "$browser": "Safari"}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 2, 16), + "properties": {"$group_0": "org:6", "$browser": "Safari"}, + }, + ], + "person3": [ + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 18), + "properties": {"$group_0": "org:6", "$browser": "Safari"}, + } + ], + }, + self.team, + ) + + filters = { + "events": [ + {"id": "sign up", "order": 0}, + {"id": "play movie", "order": 1}, + {"id": "buy", "order": 2}, + ], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown": "industry", + "breakdown_type": "group", + "breakdown_group_type_index": 0, + "aggregation_group_type_index": 0, + } + with override_instance_config("PERSON_ON_EVENTS_ENABLED", True): + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="sign up", breakdown="finance", count=1), + FunnelStepResult( + name="play movie", + breakdown="finance", + count=1, + average_conversion_time=3600.0, + median_conversion_time=3600.0, + ), + FunnelStepResult( + name="buy", + breakdown="finance", + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + ], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="sign up", breakdown="technology", count=1), + FunnelStepResult( + name="play movie", + breakdown="technology", + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + FunnelStepResult( + name="buy", + breakdown="technology", + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + ], + ) + + return TestFunnelBreakdown + + +def sort_breakdown_funnel_results(results: List[Dict[int, Any]]): + return list(sorted(results, key=lambda r: r[0]["breakdown_value"])) + + +def assert_funnel_results_equal(left: List[Dict[str, Any]], right: List[Dict[str, Any]]): + """ + Helper to be able to compare two funnel results, but exclude people urls + from the comparison, as these include: + + 1. all the params from the request, and will thus almost always be + different for varying inputs + 2. contain timestamps which are not stable across runs + """ + + def _filter(steps: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + return [{**step, "converted_people_url": None, "dropped_people_url": None} for step in steps] + + assert len(left) == len(right) + + for index, item in enumerate(_filter(left)): + other = _filter(right)[index] + assert item.keys() == other.keys() + for key in item.keys(): + try: + assert item[key] == other[key] + except AssertionError as e: + e.args += ( + f"failed comparing ${key}", + f'Got "{item[key]}" and "{other[key]}"', + ) + raise diff --git a/posthog/hogql_queries/insights/funnels/test/test_funnel.py b/posthog/hogql_queries/insights/funnels/test/test_funnel.py index 30b0cfd0df0fb..8c374eacfdb2d 100644 --- a/posthog/hogql_queries/insights/funnels/test/test_funnel.py +++ b/posthog/hogql_queries/insights/funnels/test/test_funnel.py @@ -17,7 +17,6 @@ from posthog.models.group_type_mapping import GroupTypeMapping from posthog.models.property_definition import PropertyDefinition from posthog.queries.funnels import ClickhouseFunnelActors -from posthog.queries.funnels.test.breakdown_cases import assert_funnel_results_equal from posthog.schema import EventsNode, FunnelsQuery from posthog.test.base import ( APIBaseTest, @@ -32,7 +31,10 @@ from posthog.hogql_queries.insights.funnels.test.conversion_time_cases import ( funnel_conversion_time_test_factory, ) - +from posthog.hogql_queries.insights.funnels.test.breakdown_cases import ( + assert_funnel_results_equal, + funnel_breakdown_test_factory, +) from posthog.hogql_queries.insights.funnels import Funnel from posthog.test.test_journeys import journeys_for @@ -46,18 +48,17 @@ def _create_action(**kwargs): return action -# class TestFunnelBreakdown( -# ClickhouseTestMixin, -# funnel_breakdown_test_factory( # type: ignore -# ClickhouseFunnel, -# ClickhouseFunnelActors, -# _create_event, -# _create_action, -# _create_person, -# ), -# ): -# maxDiff = None -# pass +class TestFunnelBreakdown( + ClickhouseTestMixin, + funnel_breakdown_test_factory( # type: ignore + FunnelOrderType.ORDERED, + ClickhouseFunnelActors, + _create_action, + _create_person, + ), +): + maxDiff = None + pass class TestFunnelConversionTime( @@ -3556,7 +3557,7 @@ def test_smoke(self): FROM events AS e WHERE - and(greaterOrEquals(e.timestamp, toDateTime('2024-01-03 00:00:00.000000')), lessOrEquals(e.timestamp, toDateTime('2024-01-10 23:59:59.999999'))))) + and(and(greaterOrEquals(e.timestamp, toDateTime('2024-01-03 00:00:00.000000')), lessOrEquals(e.timestamp, toDateTime('2024-01-10 23:59:59.999999'))), or(equals(step_0, 1), equals(step_1, 1))))) WHERE equals(step_0, 1) LIMIT 100""", @@ -3616,7 +3617,7 @@ def test_smoke(self): FROM events AS e WHERE - and(greaterOrEquals(e.timestamp, toDateTime('2024-01-03 00:00:00.000000')), lessOrEquals(e.timestamp, toDateTime('2024-01-10 23:59:59.999999'))))) + and(and(greaterOrEquals(e.timestamp, toDateTime('2024-01-03 00:00:00.000000')), lessOrEquals(e.timestamp, toDateTime('2024-01-10 23:59:59.999999'))), or(equals(step_0, 1), equals(step_1, 1))))) WHERE equals(step_0, 1))) GROUP BY @@ -3687,7 +3688,7 @@ def test_smoke(self): FROM events AS e WHERE - and(greaterOrEquals(e.timestamp, toDateTime('2024-01-03 00:00:00.000000')), lessOrEquals(e.timestamp, toDateTime('2024-01-10 23:59:59.999999'))))) + and(and(greaterOrEquals(e.timestamp, toDateTime('2024-01-03 00:00:00.000000')), lessOrEquals(e.timestamp, toDateTime('2024-01-10 23:59:59.999999'))), or(equals(step_0, 1), equals(step_1, 1))))) WHERE equals(step_0, 1))) GROUP BY diff --git a/posthog/hogql_queries/insights/funnels/utils.py b/posthog/hogql_queries/insights/funnels/utils.py index d5e21e219309f..2b36b2252cf78 100644 --- a/posthog/hogql_queries/insights/funnels/utils.py +++ b/posthog/hogql_queries/insights/funnels/utils.py @@ -1,4 +1,7 @@ +from typing import List from posthog.constants import FUNNEL_WINDOW_INTERVAL_TYPES +from posthog.hogql import ast +from posthog.hogql.parser import parse_expr from posthog.schema import FunnelConversionWindowTimeUnit, FunnelsFilter, StepOrderValue from rest_framework.exceptions import ValidationError @@ -38,3 +41,27 @@ def funnel_window_interval_unit_to_sql( return "DAY" else: raise ValidationError("{funnelWindowIntervalUnit} not supported") + + +def get_breakdown_expr( + breakdown: List[str | int] | None, properties_column: str, normalize_url: bool | None = False +) -> ast.Expr: + if isinstance(breakdown, str) or isinstance(breakdown, int) or breakdown is None: + return parse_expr(f"ifNull({properties_column}.{breakdown}, '')") + else: + exprs = [] + for b in breakdown: + expr = parse_expr(normalize_url_breakdown(f"ifNull({properties_column}.{b}, '')", normalize_url)) + exprs.append(expr) + expression = ast.Array(exprs=exprs) + + return expression + + +def normalize_url_breakdown(breakdown_value, breakdown_normalize_url: bool | None): + if breakdown_normalize_url: + return ( + f"if( empty(trim(TRAILING '/?#' from {breakdown_value})), '/', trim(TRAILING '/?#' from {breakdown_value}))" + ) + + return breakdown_value diff --git a/posthog/hogql_queries/legacy_compatibility/filter_to_query.py b/posthog/hogql_queries/legacy_compatibility/filter_to_query.py index 536f741a3f4b0..fc68d3f26c2f4 100644 --- a/posthog/hogql_queries/legacy_compatibility/filter_to_query.py +++ b/posthog/hogql_queries/legacy_compatibility/filter_to_query.py @@ -337,9 +337,10 @@ def _breakdown_filter(_filter: Dict): "breakdown_normalize_url": _filter.get("breakdown_normalize_url"), "breakdown_group_type_index": _filter.get("breakdown_group_type_index"), "breakdown_hide_other_aggregation": _filter.get("breakdown_hide_other_aggregation"), - "breakdown_histogram_bin_count": _filter.get("breakdown_histogram_bin_count") - if _insight_type(_filter) == "TRENDS" - else None, + "breakdown_histogram_bin_count": ( + _filter.get("breakdown_histogram_bin_count") if _insight_type(_filter) == "TRENDS" else None + ), + "breakdown_limit": _filter.get("breakdown_limit"), } # fix breakdown typo diff --git a/posthog/queries/funnels/base.py b/posthog/queries/funnels/base.py index 7e60b4fe87ca5..e52880f6a091d 100644 --- a/posthog/queries/funnels/base.py +++ b/posthog/queries/funnels/base.py @@ -499,14 +499,14 @@ def _add_breakdown_attribution_subquery(self, inner_query: str) -> str: if self._query_has_array_breakdown(): default_breakdown_value = f"""[{','.join(["''" for _ in range(len(self._filter.breakdown or []))])}]""" # default is [''] when dealing with a single breakdown array, otherwise ['', '', ...., ''] - breakdown_selelector = ( + breakdown_selector = ( f"if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, {default_breakdown_value})" ) else: - breakdown_selelector = "prop_vals" + breakdown_selector = "prop_vals" return f""" - SELECT *, {breakdown_selelector} as prop + SELECT *, {breakdown_selector} as prop FROM ({inner_query}) """