diff --git a/frontend/__snapshots__/scenes-app-insights--user-paths--light--webkit.png b/frontend/__snapshots__/scenes-app-insights--user-paths--light--webkit.png index 8e07fa5274c2fa..77e11428b25bf0 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--user-paths--light--webkit.png and b/frontend/__snapshots__/scenes-app-insights--user-paths--light--webkit.png differ diff --git a/frontend/__snapshots__/scenes-app-insights-error-empty-states--server-error--dark.png b/frontend/__snapshots__/scenes-app-insights-error-empty-states--server-error--dark.png index 582ed9c4e28b76..1fce3e88458830 100644 Binary files a/frontend/__snapshots__/scenes-app-insights-error-empty-states--server-error--dark.png and b/frontend/__snapshots__/scenes-app-insights-error-empty-states--server-error--dark.png differ diff --git a/frontend/__snapshots__/scenes-app-insights-error-empty-states--server-error--light.png b/frontend/__snapshots__/scenes-app-insights-error-empty-states--server-error--light.png index f50a6c1cbb7fa3..dda9a3c53e4285 100644 Binary files a/frontend/__snapshots__/scenes-app-insights-error-empty-states--server-error--light.png and b/frontend/__snapshots__/scenes-app-insights-error-empty-states--server-error--light.png differ diff --git a/frontend/__snapshots__/scenes-app-persons-groups--persons--light.png b/frontend/__snapshots__/scenes-app-persons-groups--persons--light.png index 3547135528e9ac..2bdb185b0f26d8 100644 Binary files a/frontend/__snapshots__/scenes-app-persons-groups--persons--light.png and b/frontend/__snapshots__/scenes-app-persons-groups--persons--light.png differ diff --git a/frontend/__snapshots__/scenes-app-pipeline--pipeline-landing-page--light.png b/frontend/__snapshots__/scenes-app-pipeline--pipeline-landing-page--light.png index 39a67398c983de..2a142d7d455baf 100644 Binary files a/frontend/__snapshots__/scenes-app-pipeline--pipeline-landing-page--light.png and b/frontend/__snapshots__/scenes-app-pipeline--pipeline-landing-page--light.png differ diff --git a/frontend/__snapshots__/scenes-app-pipeline--pipeline-node-configuration--light.png b/frontend/__snapshots__/scenes-app-pipeline--pipeline-node-configuration--light.png index 02c6b7bb199bc1..f38a0670d7a7c2 100644 Binary files a/frontend/__snapshots__/scenes-app-pipeline--pipeline-node-configuration--light.png and b/frontend/__snapshots__/scenes-app-pipeline--pipeline-node-configuration--light.png differ diff --git a/frontend/__snapshots__/scenes-app-pipeline--pipeline-node-metrics--light.png b/frontend/__snapshots__/scenes-app-pipeline--pipeline-node-metrics--light.png index 851204e36f6262..633b3d2ba6e326 100644 Binary files a/frontend/__snapshots__/scenes-app-pipeline--pipeline-node-metrics--light.png and b/frontend/__snapshots__/scenes-app-pipeline--pipeline-node-metrics--light.png differ diff --git a/frontend/__snapshots__/scenes-other-settings--settings-organization--light.png b/frontend/__snapshots__/scenes-other-settings--settings-organization--light.png index fdc41722b4cad1..e533c4ecc91615 100644 Binary files a/frontend/__snapshots__/scenes-other-settings--settings-organization--light.png and b/frontend/__snapshots__/scenes-other-settings--settings-organization--light.png differ diff --git a/frontend/__snapshots__/scenes-other-settings--settings-project--light.png b/frontend/__snapshots__/scenes-other-settings--settings-project--light.png index cafcc815f0758c..beaf522b7f48f7 100644 Binary files a/frontend/__snapshots__/scenes-other-settings--settings-project--light.png and b/frontend/__snapshots__/scenes-other-settings--settings-project--light.png differ diff --git a/frontend/src/queries/schema.json b/frontend/src/queries/schema.json index 4cd9c29387ac42..b1765eb5896eaf 100644 --- a/frontend/src/queries/schema.json +++ b/frontend/src/queries/schema.json @@ -1675,6 +1675,12 @@ "required": ["average_conversion_time", "bins"], "type": "object" }, + "FunnelTrendsResults": { + "items": { + "type": "object" + }, + "type": "array" + }, "FunnelVizType": { "enum": ["steps", "time_to_convert", "trends"], "type": "string" @@ -1873,6 +1879,9 @@ }, { "$ref": "#/definitions/FunnelTimeToConvertResults" + }, + { + "$ref": "#/definitions/FunnelTrendsResults" } ] }, diff --git a/frontend/src/queries/schema.ts b/frontend/src/queries/schema.ts index 85e5e806fb5588..1b29475c343498 100644 --- a/frontend/src/queries/schema.ts +++ b/frontend/src/queries/schema.ts @@ -705,8 +705,9 @@ export type FunnelTimeToConvertResults = { average_conversion_time: number bins: [BinNumber, BinNumber][] } +export type FunnelTrendsResults = Record[] export interface FunnelsQueryResponse extends QueryResponse { - results: FunnelStepsResults | FunnelStepsBreakdownResults | FunnelTimeToConvertResults + results: FunnelStepsResults | FunnelStepsBreakdownResults | FunnelTimeToConvertResults | FunnelTrendsResults } /** `RetentionFilterType` minus everything inherited from `FilterType` */ diff --git a/posthog/hogql_queries/insights/funnels/__init__.py b/posthog/hogql_queries/insights/funnels/__init__.py index c189ae7cfe2e3b..2e3275ff7fdfeb 100644 --- a/posthog/hogql_queries/insights/funnels/__init__.py +++ b/posthog/hogql_queries/insights/funnels/__init__.py @@ -3,3 +3,4 @@ from .funnel_strict import FunnelStrict from .funnel_unordered import FunnelUnordered from .funnel_time_to_convert import FunnelTimeToConvert +from .funnel_trends import FunnelTrends diff --git a/posthog/hogql_queries/insights/funnels/base.py b/posthog/hogql_queries/insights/funnels/base.py index f742d17e767ffc..91f08cf6360849 100644 --- a/posthog/hogql_queries/insights/funnels/base.py +++ b/posthog/hogql_queries/insights/funnels/base.py @@ -39,10 +39,7 @@ class FunnelBase(ABC): _extra_event_fields: List[ColumnName] _extra_event_properties: List[PropertyName] - def __init__( - self, - context: FunnelQueryContext, - ): + def __init__(self, context: FunnelQueryContext): self.context = context self._extra_event_fields: List[ColumnName] = [] @@ -56,10 +53,10 @@ def __init__( def get_query(self) -> ast.SelectQuery: raise NotImplementedError() - def get_step_counts_query(self) -> str: + def get_step_counts_query(self) -> ast.SelectQuery: raise NotImplementedError() - def get_step_counts_without_aggregation_query(self) -> str: + def get_step_counts_without_aggregation_query(self) -> ast.SelectQuery: raise NotImplementedError() @cached_property diff --git a/posthog/hogql_queries/insights/funnels/funnel_query_context.py b/posthog/hogql_queries/insights/funnels/funnel_query_context.py index db5fc7e7a17def..b373786753d57a 100644 --- a/posthog/hogql_queries/insights/funnels/funnel_query_context.py +++ b/posthog/hogql_queries/insights/funnels/funnel_query_context.py @@ -13,6 +13,7 @@ FunnelsFilter, FunnelsQuery, HogQLQueryModifiers, + IntervalType, ) @@ -21,6 +22,8 @@ class FunnelQueryContext(QueryContext): funnelsFilter: FunnelsFilter breakdownFilter: BreakdownFilter + interval: IntervalType + breakdown: List[Union[str, int]] | None breakdownType: BreakdownType breakdownAttributionType: BreakdownAttributionType @@ -42,6 +45,8 @@ def __init__( self.breakdownFilter = self.query.breakdownFilter or BreakdownFilter() # defaults + self.interval = self.query.interval or IntervalType.day + self.breakdownType = self.breakdownFilter.breakdown_type or BreakdownType.event self.breakdownAttributionType = ( self.funnelsFilter.breakdownAttributionType or BreakdownAttributionType.first_touch diff --git a/posthog/hogql_queries/insights/funnels/funnel_trends.py b/posthog/hogql_queries/insights/funnels/funnel_trends.py new file mode 100644 index 00000000000000..834c9e1e23f207 --- /dev/null +++ b/posthog/hogql_queries/insights/funnels/funnel_trends.py @@ -0,0 +1,321 @@ +from datetime import datetime +from itertools import groupby +from typing import Any, Dict, List, Optional, Tuple +from posthog.hogql import ast +from posthog.hogql.parser import parse_expr +from posthog.hogql_queries.insights.funnels.base import FunnelBase +from posthog.hogql_queries.insights.funnels.funnel_query_context import FunnelQueryContext +from posthog.hogql_queries.insights.funnels.utils import get_funnel_order_class +from posthog.hogql_queries.insights.utils.utils import get_start_of_interval_hogql +from posthog.hogql_queries.utils.query_date_range import QueryDateRange +from posthog.models.cohort.cohort import Cohort +from posthog.queries.util import correct_result_for_sampling, get_earliest_timestamp, get_interval_func_ch + + +TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S" +HUMAN_READABLE_TIMESTAMP_FORMAT = "%-d-%b-%Y" + + +class FunnelTrends(FunnelBase): + """ + ## Funnel trends assumptions + + Funnel trends are a graph of conversion over time – meaning a Y ({conversion_rate}) for each X ({entrance_period}). + + ### What is {entrance_period}? + + A funnel is considered entered by a user when they have performed its first step. + When that happens, we consider that an entrance of funnel. + + Now, our time series is based on a sequence of {entrance_period}s, each starting at {entrance_period_start} + and ending _right before the next_ {entrance_period_start}. A person is then counted at most once in each + {entrance_period}. + + ### What is {conversion_rate}? + + Each time a funnel is entered by a person, they have exactly {funnel_window_interval} {funnel_window_interval_unit} to go + through the funnel's steps. Later events are just not taken into account. + + For {conversion_rate}, we need to know reference steps: {from_step} and {to_step}. + By default they are respectively the first and the last steps of the funnel. + + Then for each {entrance_period} we calculate {reached_from_step_count} – the number of persons + who entered the funnel and reached step {from_step} (along with all the steps leading up to it, if there any). + Similarly we calculate {reached_to_step_count}, which is the number of persons from {reached_from_step_count} + who also reached step {to_step} (along with all the steps leading up to it, including of course step {from_step}). + + {conversion_rate} is simply {reached_to_step_count} divided by {reached_from_step_count}, + multiplied by 100 to be a percentage. + + If no people have reached step {from_step} in the period, {conversion_rate} is zero. + """ + + just_summarize = False + + def __init__(self, context: FunnelQueryContext, just_summarize=False): + super().__init__(context) + + self.just_summarize = just_summarize + self.funnel_order = get_funnel_order_class(self.context.funnelsFilter)(context=self.context) + + def _format_results(self, results) -> List[Dict[str, Any]]: + query = self.context.query + + breakdown_clause = self._get_breakdown_prop() + + summary = [] + + for period_row in results: + serialized_result = { + "timestamp": period_row[0], + "reached_from_step_count": correct_result_for_sampling(period_row[1], query.samplingFactor), + "reached_to_step_count": correct_result_for_sampling(period_row[2], query.samplingFactor), + "conversion_rate": period_row[3], + } + + if breakdown_clause: + if isinstance(period_row[-1], str) or ( + isinstance(period_row[-1], List) and all(isinstance(item, str) for item in period_row[-1]) + ): + serialized_result.update({"breakdown_value": (period_row[-1])}) + else: + serialized_result.update({"breakdown_value": Cohort.objects.get(pk=period_row[-1]).name}) + + summary.append(serialized_result) + + if self.just_summarize is False: + return self._format_summarized_results(summary) + return summary + + def _format_summarized_results(self, summary): + breakdown = self.context.breakdown + + if breakdown: + grouper = lambda row: row["breakdown_value"] + sorted_data = sorted(summary, key=grouper) + final_res = [] + for key, value in groupby(sorted_data, grouper): + breakdown_res = self._format_single_summary(list(value)) + final_res.append({**breakdown_res, "breakdown_value": key}) + return final_res + else: + res = self._format_single_summary(summary) + + return [res] + + def _format_single_summary(self, summary): + interval = self.context.interval + + count = len(summary) + data = [] + days = [] + labels = [] + for row in summary: + timestamp: datetime = row["timestamp"] + data.append(row["conversion_rate"]) + hour_min_sec = " %H:%M:%S" if interval.value == "hour" else "" + days.append(timestamp.strftime(f"%Y-%m-%d{hour_min_sec}")) + labels.append(timestamp.strftime(HUMAN_READABLE_TIMESTAMP_FORMAT)) + return {"count": count, "data": data, "days": days, "labels": labels} + + def get_query(self) -> ast.SelectQuery: + team, interval, query, now = self.context.team, self.context.interval, self.context.query, self.context.now + + date_range = QueryDateRange( + date_range=query.dateRange, + team=team, + interval=query.interval, + now=now, + ) + + step_counts = self.get_step_counts_without_aggregation_query() + # Expects multiple rows for same person, first event time, steps taken. + + ( + reached_from_step_count_condition, + reached_to_step_count_condition, + _, + ) = self.get_steps_reached_conditions() + interval_func = get_interval_func_ch(interval.value) + + if date_range.date_from() is None: + _date_from = get_earliest_timestamp(team.pk) + else: + _date_from = date_range.date_from() + + breakdown_clause = self._get_breakdown_prop_expr() + + data_select: List[ast.Expr] = [ + ast.Field(chain=["entrance_period_start"]), + parse_expr(f"countIf({reached_from_step_count_condition}) AS reached_from_step_count"), + parse_expr(f"countIf({reached_to_step_count_condition}) AS reached_to_step_count"), + *breakdown_clause, + ] + + formatted_date_from = (_date_from.strftime("%Y-%m-%d %H:%M:%S"),) + formatted_date_to = (date_range.date_to().strftime("%Y-%m-%d %H:%M:%S"),) + date_from_as_hogql = ast.Call( + name="assumeNotNull", + args=[ast.Call(name="toDateTime", args=[(ast.Constant(value=formatted_date_from))])], + ) + date_to_as_hogql = ast.Call( + name="assumeNotNull", + args=[ast.Call(name="toDateTime", args=[(ast.Constant(value=formatted_date_to))])], + ) + data_select_from = ast.JoinExpr(table=step_counts) + data_group_by: List[ast.Expr] = [ast.Field(chain=["entrance_period_start"]), *breakdown_clause] + data_query = ast.SelectQuery(select=data_select, select_from=data_select_from, group_by=data_group_by) + + fill_select: List[ast.Expr] = [ + ast.Alias( + alias="entrance_period_start", + expr=ast.ArithmeticOperation( + left=get_start_of_interval_hogql(interval.value, team=team, source=date_from_as_hogql), + right=ast.Call(name=interval_func, args=[ast.Field(chain=["number"])]), + op=ast.ArithmeticOperationOp.Add, + ), + ), + *([parse_expr("breakdown_value as prop")] if len(breakdown_clause) > 0 else []), + ] + fill_select_from = ast.JoinExpr( + table=ast.Field(chain=["numbers"]), + table_args=[ + ast.ArithmeticOperation( + left=ast.Call( + name="dateDiff", + args=[ + ast.Constant(value=interval.value), + get_start_of_interval_hogql(interval.value, team=team, source=date_from_as_hogql), + get_start_of_interval_hogql(interval.value, team=team, source=date_to_as_hogql), + ], + ), + right=ast.Constant(value=1), + op=ast.ArithmeticOperationOp.Add, + ) + ], + alias="period_offsets", + ) + fill_query = ast.SelectQuery( + select=fill_select, + select_from=fill_select_from, + array_join_op="ARRAY JOIN" if len(breakdown_clause) > 0 else None, + array_join_list=( + [ + ast.Alias( + alias="breakdown_value", + expr=ast.Array(exprs=[parse_expr(str(value)) for value in self.breakdown_values]), + hidden=False, + ) + ] + if len(breakdown_clause) > 0 + else None + ), + ) + fill_breakdown_join_constraint = [] + if len(breakdown_clause) > 0: + # can only be a field here, since group_remaining is false + breakdown_field: ast.Field = breakdown_clause[0] # type: ignore + fill_breakdown_join_constraint = [ + ast.CompareOperation( + left=ast.Field(chain=["data", *breakdown_field.chain]), + right=ast.Field(chain=["fill", *breakdown_field.chain]), + op=ast.CompareOperationOp.Eq, + ) + ] + fill_join = ast.JoinExpr( + table=fill_query, + alias="fill", + join_type="RIGHT OUTER JOIN", + constraint=ast.JoinConstraint( + expr=ast.And( + exprs=[ + ast.CompareOperation( + left=ast.Field(chain=["data", "entrance_period_start"]), + right=ast.Field(chain=["fill", "entrance_period_start"]), + op=ast.CompareOperationOp.Eq, + ), + *fill_breakdown_join_constraint, + ] + ) + ), + ) + + select: List[ast.Expr] = [ + ast.Field(chain=["fill", "entrance_period_start"]), + ast.Field(chain=["reached_from_step_count"]), + ast.Field(chain=["reached_to_step_count"]), + parse_expr( + "if(reached_from_step_count > 0, round(reached_to_step_count / reached_from_step_count * 100, 2), 0) AS conversion_rate" + ), + *([ast.Field(chain=["fill", *breakdown_field.chain])] if len(breakdown_clause) > 0 else []), + ] + select_from = ast.JoinExpr( + table=data_query, + alias="data", + next_join=fill_join, + ) + order_by: List[ast.OrderExpr] = [ + ast.OrderExpr(expr=ast.Field(chain=["fill", "entrance_period_start"]), order="ASC") + ] + + return ast.SelectQuery( + select=select, + select_from=select_from, + order_by=order_by, + limit=ast.Constant(value=1_000), # increased limit (default 100) for hourly breakdown + ) + + def get_step_counts_without_aggregation_query( + self, *, specific_entrance_period_start: Optional[datetime] = None + ) -> ast.SelectQuery: + team, interval = self.context.team, self.context.interval + + steps_per_person_query = self.funnel_order.get_step_counts_without_aggregation_query() + + # # This is used by funnel trends when we only need data for one period, e.g. person per data point + # if specific_entrance_period_start: + # self.params["entrance_period_start"] = specific_entrance_period_start.strftime(TIMESTAMP_FORMAT) + + # event_select_clause = "" + # if self._filter.include_recordings: + # max_steps = len(self._filter.entities) + # event_select_clause = self._get_matching_event_arrays(max_steps) + + breakdown_clause = self._get_breakdown_prop_expr() + + select: List[ast.Expr] = [ + ast.Field(chain=["aggregation_target"]), + ast.Alias(alias="entrance_period_start", expr=get_start_of_interval_hogql(interval.value, team=team)), + parse_expr("max(steps) AS steps_completed"), + # {event_select_clause} + *breakdown_clause, + ] + select_from = ast.JoinExpr(table=steps_per_person_query) + # {"WHERE toDateTime(entrance_period_start) = %(entrance_period_start)s" if specific_entrance_period_start else ""} + group_by: List[ast.Expr] = [ + ast.Field(chain=["aggregation_target"]), + ast.Field(chain=["entrance_period_start"]), + *breakdown_clause, + ] + + return ast.SelectQuery(select=select, select_from=select_from, group_by=group_by) + + def get_steps_reached_conditions(self) -> Tuple[str, str, str]: + funnelsFilter, max_steps = self.context.funnelsFilter, self.context.max_steps + + # How many steps must have been done to count for the denominator of a funnel trends data point + from_step = funnelsFilter.funnelFromStep or 0 + # How many steps must have been done to count for the numerator of a funnel trends data point + to_step = funnelsFilter.funnelToStep or max_steps - 1 + + # Those who converted OR dropped off + reached_from_step_count_condition = f"steps_completed >= {from_step+1}" + # Those who converted + reached_to_step_count_condition = f"steps_completed >= {to_step+1}" + # Those who dropped off + did_not_reach_to_step_count_condition = f"{reached_from_step_count_condition} AND steps_completed < {to_step+1}" + return ( + reached_from_step_count_condition, + reached_to_step_count_condition, + did_not_reach_to_step_count_condition, + ) diff --git a/posthog/hogql_queries/insights/funnels/funnels_query_runner.py b/posthog/hogql_queries/insights/funnels/funnels_query_runner.py index 6be7f27a7d01ad..4de89061774c9d 100644 --- a/posthog/hogql_queries/insights/funnels/funnels_query_runner.py +++ b/posthog/hogql_queries/insights/funnels/funnels_query_runner.py @@ -16,6 +16,7 @@ from posthog.hogql.timings import HogQLTimings from posthog.hogql_queries.insights.funnels.funnel_query_context import FunnelQueryContext from posthog.hogql_queries.insights.funnels.funnel_time_to_convert import FunnelTimeToConvert +from posthog.hogql_queries.insights.funnels.funnel_trends import FunnelTrends from posthog.hogql_queries.insights.funnels.utils import get_funnel_order_class from posthog.hogql_queries.query_runner import QueryRunner from posthog.hogql_queries.utils.query_date_range import QueryDateRange @@ -41,12 +42,14 @@ def __init__( timings: Optional[HogQLTimings] = None, modifiers: Optional[HogQLQueryModifiers] = None, limit_context: Optional[LimitContext] = None, + **kwargs, ): super().__init__(query, team=team, timings=timings, modifiers=modifiers, limit_context=limit_context) self.context = FunnelQueryContext( query=self.query, team=team, timings=timings, modifiers=modifiers, limit_context=limit_context ) + self.kwargs = kwargs def _is_stale(self, cached_result_package): date_to = self.query_date_range.date_to() @@ -104,8 +107,7 @@ def funnel_class(self): funnelVizType = self.context.funnelsFilter.funnelVizType if funnelVizType == FunnelVizType.trends: - # return FunnelTrends(context=self.context) - return self.funnel_order_class + return FunnelTrends(context=self.context, **self.kwargs) elif funnelVizType == FunnelVizType.time_to_convert: return FunnelTimeToConvert(context=self.context) else: diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_trends.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_trends.ambr new file mode 100644 index 00000000000000..5fa91548e3037f --- /dev/null +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_trends.ambr @@ -0,0 +1,348 @@ +# serializer version: 1 +# name: TestFunnelTrends.test_timezones_trends + ''' + SELECT fill.entrance_period_start AS entrance_period_start, + data.reached_from_step_count AS reached_from_step_count, + data.reached_to_step_count AS reached_to_step_count, + if(ifNull(greater(data.reached_from_step_count, 0), 0), round(multiply(divide(data.reached_to_step_count, data.reached_from_step_count), 100), 2), 0) AS conversion_rate + FROM + (SELECT entrance_period_start AS entrance_period_start, + countIf(ifNull(greaterOrEquals(steps_completed, 1), 0)) AS reached_from_step_count, + countIf(ifNull(greaterOrEquals(steps_completed, 3), 0)) AS reached_to_step_count + FROM + (SELECT aggregation_target AS aggregation_target, + toStartOfDay(timestamp) AS entrance_period_start, + max(steps) AS steps_completed + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + latest_2 AS latest_2, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(latest_0, toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1)) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(latest_1, toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2 + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS aggregation_target, + if(equals(e.event, 'step one'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'step two'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(equals(e.event, 'step three'), 1, 0) AS step_2, + if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2 + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-04-30 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-05-07 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('step one', 'step three', 'step two'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))) + WHERE ifNull(equals(step_0, 1), 0)) + GROUP BY aggregation_target, + entrance_period_start) + GROUP BY entrance_period_start) AS data + RIGHT OUTER JOIN + (SELECT plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull(('2021-04-30 00:00:00'), 6, 'UTC'))), toIntervalDay(period_offsets.number)) AS entrance_period_start + FROM numbers(plus(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull(('2021-04-30 00:00:00'), 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull(('2021-05-07 23:59:59'), 6, 'UTC')))), 1)) AS period_offsets) AS fill ON equals(data.entrance_period_start, fill.entrance_period_start) + ORDER BY fill.entrance_period_start ASC + LIMIT 1000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelTrends.test_timezones_trends.1 + ''' + SELECT fill.entrance_period_start AS entrance_period_start, + data.reached_from_step_count AS reached_from_step_count, + data.reached_to_step_count AS reached_to_step_count, + if(ifNull(greater(data.reached_from_step_count, 0), 0), round(multiply(divide(data.reached_to_step_count, data.reached_from_step_count), 100), 2), 0) AS conversion_rate + FROM + (SELECT entrance_period_start AS entrance_period_start, + countIf(ifNull(greaterOrEquals(steps_completed, 1), 0)) AS reached_from_step_count, + countIf(ifNull(greaterOrEquals(steps_completed, 3), 0)) AS reached_to_step_count + FROM + (SELECT aggregation_target AS aggregation_target, + toStartOfDay(timestamp) AS entrance_period_start, + max(steps) AS steps_completed + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + latest_2 AS latest_2, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(latest_0, toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1)) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(latest_1, toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2 + FROM + (SELECT toTimeZone(e.timestamp, 'US/Pacific') AS timestamp, + e__pdi.person_id AS aggregation_target, + if(equals(e.event, 'step one'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'step two'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(equals(e.event, 'step three'), 1, 0) AS step_2, + if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2 + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'US/Pacific'), toDateTime64('2021-04-30 00:00:00.000000', 6, 'US/Pacific')), lessOrEquals(toTimeZone(e.timestamp, 'US/Pacific'), toDateTime64('2021-05-07 23:59:59.999999', 6, 'US/Pacific'))), in(e.event, tuple('step one', 'step three', 'step two'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))) + WHERE ifNull(equals(step_0, 1), 0)) + GROUP BY aggregation_target, + entrance_period_start) + GROUP BY entrance_period_start) AS data + RIGHT OUTER JOIN + (SELECT plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull(('2021-04-30 00:00:00'), 6, 'US/Pacific'))), toIntervalDay(period_offsets.number)) AS entrance_period_start + FROM numbers(plus(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull(('2021-04-30 00:00:00'), 6, 'US/Pacific'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull(('2021-05-07 23:59:59'), 6, 'US/Pacific')))), 1)) AS period_offsets) AS fill ON equals(data.entrance_period_start, fill.entrance_period_start) + ORDER BY fill.entrance_period_start ASC + LIMIT 1000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelTrends.test_week_interval + ''' + SELECT fill.entrance_period_start AS entrance_period_start, + data.reached_from_step_count AS reached_from_step_count, + data.reached_to_step_count AS reached_to_step_count, + if(ifNull(greater(data.reached_from_step_count, 0), 0), round(multiply(divide(data.reached_to_step_count, data.reached_from_step_count), 100), 2), 0) AS conversion_rate + FROM + (SELECT entrance_period_start AS entrance_period_start, + countIf(ifNull(greaterOrEquals(steps_completed, 1), 0)) AS reached_from_step_count, + countIf(ifNull(greaterOrEquals(steps_completed, 3), 0)) AS reached_to_step_count + FROM + (SELECT aggregation_target AS aggregation_target, + toStartOfWeek(timestamp, 0) AS entrance_period_start, + max(steps) AS steps_completed + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + latest_2 AS latest_2, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(latest_0, toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1)) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(latest_1, toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2 + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS aggregation_target, + if(equals(e.event, 'step one'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'step two'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(equals(e.event, 'step three'), 1, 0) AS step_2, + if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2 + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-05-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-05-07 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('step one', 'step three', 'step two'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))) + WHERE ifNull(equals(step_0, 1), 0)) + GROUP BY aggregation_target, + entrance_period_start) + GROUP BY entrance_period_start) AS data + RIGHT OUTER JOIN + (SELECT plus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull(('2021-05-01 00:00:00'), 6, 'UTC')), 0), toIntervalWeek(period_offsets.number)) AS entrance_period_start + FROM numbers(plus(dateDiff('week', toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull(('2021-05-01 00:00:00'), 6, 'UTC')), 0), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull(('2021-05-07 23:59:59'), 6, 'UTC')), 0)), 1)) AS period_offsets) AS fill ON equals(data.entrance_period_start, fill.entrance_period_start) + ORDER BY fill.entrance_period_start ASC + LIMIT 1000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelTrends.test_week_interval.1 + ''' + + SELECT aggregation_target AS actor_id + FROM + (SELECT aggregation_target, + toStartOfWeek(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC'), 0) AS entrance_period_start, + max(steps) AS steps_completed + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY + AND latest_1 <= latest_2 + AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + if(isNotNull(latest_2) + AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + if(latest_2 < latest_1, NULL, latest_2) as latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT e.timestamp as timestamp, + pdi.person_id as aggregation_target, + pdi.person_id as person_id, + if(event = 'step one', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'step two', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'step three', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2 + FROM events e + INNER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 2 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 2 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + WHERE team_id = 2 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) )))) + WHERE step_0 = 1 ) + WHERE toDateTime(entrance_period_start) = '2021-04-25 00:00:00' + GROUP BY aggregation_target, + entrance_period_start) + WHERE steps_completed >= 3 + ORDER BY aggregation_target + LIMIT 100 + OFFSET 0 SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000 + ''' +# --- diff --git a/posthog/hogql_queries/insights/funnels/test/test_funnel_trends.py b/posthog/hogql_queries/insights/funnels/test/test_funnel_trends.py new file mode 100644 index 00000000000000..6ca333b036f141 --- /dev/null +++ b/posthog/hogql_queries/insights/funnels/test/test_funnel_trends.py @@ -0,0 +1,1389 @@ +from datetime import date, datetime, timedelta +from typing import cast + +from zoneinfo import ZoneInfo +from freezegun.api import freeze_time + +from posthog.constants import INSIGHT_FUNNELS, TRENDS_LINEAR, FunnelOrderType +from posthog.hogql_queries.insights.funnels.funnels_query_runner import FunnelsQueryRunner +from posthog.hogql_queries.legacy_compatibility.filter_to_query import filter_to_query +from posthog.models.cohort.cohort import Cohort +from posthog.models.filters import Filter +from posthog.queries.funnels.funnel_trends_persons import ClickhouseFunnelTrendsActors +from posthog.schema import FunnelsQuery +from posthog.test.base import ( + APIBaseTest, + ClickhouseTestMixin, + _create_person, + snapshot_clickhouse_queries, +) +from posthog.test.test_journeys import journeys_for + +FORMAT_TIME = "%Y-%m-%d %H:%M:%S" +FORMAT_TIME_DAY_END = "%Y-%m-%d 23:59:59" + + +class TestFunnelTrends(ClickhouseTestMixin, APIBaseTest): + maxDiff = None + + def _get_actors_at_step(self, filter, entrance_period_start, drop_off): + filter = Filter(data=filter, team=self.team) + person_filter = filter.shallow_clone({"entrance_period_start": entrance_period_start, "drop_off": drop_off}) + funnel_query_builder = ClickhouseFunnelTrendsActors(person_filter, self.team) + _, serialized_result, _ = funnel_query_builder.get_actors() + + return serialized_result + + def _create_sample_data(self): + # five people, three steps + journeys_for( + { + "user_one": [ + {"event": "step one", "timestamp": datetime(2021, 5, 1)}, + {"event": "step two", "timestamp": datetime(2021, 5, 3)}, + {"event": "step three", "timestamp": datetime(2021, 5, 5)}, + ], + "user_two": [ + {"event": "step one", "timestamp": datetime(2021, 5, 2)}, + {"event": "step two", "timestamp": datetime(2021, 5, 4)}, + ], + "user_three": [{"event": "step one", "timestamp": datetime(2021, 5, 6)}], + "user_four": [{"event": "step none", "timestamp": datetime(2021, 5, 6)}], + "user_five": [ + {"event": "step one", "timestamp": datetime(2021, 5, 1, 1)}, + {"event": "step two", "timestamp": datetime(2021, 5, 1, 2)}, + {"event": "step three", "timestamp": datetime(2021, 5, 1, 3)}, + ], + "user_six": [ + {"event": "step one", "timestamp": datetime(2021, 5, 1)}, + {"event": "step two", "timestamp": datetime(2021, 5, 3)}, + {"event": "step three", "timestamp": datetime(2021, 5, 5)}, + ], + "user_seven": [ + {"event": "step one", "timestamp": datetime(2021, 5, 2)}, + {"event": "step two", "timestamp": datetime(2021, 5, 4)}, + ], + "user_eight": [], + }, + self.team, + ) + + def test_no_event_in_period(self): + journeys_for( + {"user a": [{"event": "Step one", "timestamp": datetime(2021, 6, 6, 21)}]}, + self.team, + ) + + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_viz_type": "trends", + "display": TRENDS_LINEAR, + "interval": "day", + "date_from": "2021-06-07 00:00:00", + "date_to": "2021-06-13 23:59:59", + "funnel_window_days": 7, + "events": [ + {"id": "step one", "order": 0}, + {"id": "step two", "order": 1}, + {"id": "step three", "order": 2}, + ], + } + + # funnel_trends = ClickhouseFunnelTrends(filter, self.team) + # results = funnel_trends._exec_query() + # formatted_results = funnel_trends._format_results(results) + query = cast(FunnelsQuery, filter_to_query(filters)) + runner = FunnelsQueryRunner(query=query, team=self.team, just_summarize=True) + results = runner.calculate().results + formatted_results = runner.funnel_class._format_summarized_results(results) + + self.assertEqual(len(results), 7) + self.assertEqual(formatted_results[0]["days"][0], "2021-06-07") + + def test_only_one_user_reached_one_step(self): + journeys_for( + {"user a": [{"event": "step one", "timestamp": datetime(2021, 6, 7, 19)}]}, + self.team, + ) + + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_viz_type": "trends", + "funnel_viz_type": "trends", + "display": TRENDS_LINEAR, + "interval": "day", + "date_from": "2021-06-07 00:00:00", + "date_to": "2021-06-13 23:59:59", + "funnel_window_days": 7, + "events": [ + {"id": "step one", "order": 0}, + {"id": "step two", "order": 1}, + {"id": "step three", "order": 2}, + ], + } + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team, just_summarize=True).calculate().results + + self.assertEqual( + results, + [ + { + "reached_to_step_count": 0, + "conversion_rate": 0, + "reached_from_step_count": 1, + "timestamp": datetime(2021, 6, 7, 0, 0).replace(tzinfo=ZoneInfo("UTC")), + }, + { + "reached_to_step_count": 0, + "conversion_rate": 0, + "reached_from_step_count": 0, + "timestamp": datetime(2021, 6, 8, 0, 0).replace(tzinfo=ZoneInfo("UTC")), + }, + { + "reached_to_step_count": 0, + "conversion_rate": 0, + "reached_from_step_count": 0, + "timestamp": datetime(2021, 6, 9, 0, 0).replace(tzinfo=ZoneInfo("UTC")), + }, + { + "reached_to_step_count": 0, + "conversion_rate": 0, + "reached_from_step_count": 0, + "timestamp": datetime(2021, 6, 10, 0, 0).replace(tzinfo=ZoneInfo("UTC")), + }, + { + "reached_to_step_count": 0, + "conversion_rate": 0, + "reached_from_step_count": 0, + "timestamp": datetime(2021, 6, 11, 0, 0).replace(tzinfo=ZoneInfo("UTC")), + }, + { + "reached_to_step_count": 0, + "conversion_rate": 0, + "reached_from_step_count": 0, + "timestamp": datetime(2021, 6, 12, 0, 0).replace(tzinfo=ZoneInfo("UTC")), + }, + { + "reached_to_step_count": 0, + "conversion_rate": 0, + "reached_from_step_count": 0, + "timestamp": datetime(2021, 6, 13, 0, 0).replace(tzinfo=ZoneInfo("UTC")), + }, + ], + ) + + # 1 user who dropped off starting 2021-06-07 + funnel_trends_persons_existent_dropped_off_results = self._get_actors_at_step( + filters, "2021-06-07 00:00:00", True + ) + + self.assertEqual(len(funnel_trends_persons_existent_dropped_off_results), 1) + self.assertEqual( + [person["distinct_ids"] for person in funnel_trends_persons_existent_dropped_off_results], + [["user a"]], + ) + + # No users converted 2021-06-07 + funnel_trends_persons_nonexistent_converted_results = self._get_actors_at_step( + filters, "2021-06-07 00:00:00", False + ) + + self.assertEqual(len(funnel_trends_persons_nonexistent_converted_results), 0) + + # No users dropped off 2021-06-08 + funnel_trends_persons_nonexistent_converted_results = self._get_actors_at_step( + filters, "2021-06-08 00:00:00", True + ) + + self.assertEqual(len(funnel_trends_persons_nonexistent_converted_results), 0) + + # minute, hour, day, week, month + def test_hour_interval(self): + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_viz_type": "trends", + "display": TRENDS_LINEAR, + "interval": "hour", + "date_from": "2021-05-01 00:00:00", + "funnel_window_interval": 7, + "events": [ + {"id": "step one", "order": 0}, + {"id": "step two", "order": 1}, + {"id": "step three", "order": 2}, + ], + } + + with freeze_time("2021-05-06T23:40:59Z"): + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team, just_summarize=True).calculate().results + + self.assertEqual(len(results), 144) + + def test_day_interval(self): + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_viz_type": "trends", + "display": TRENDS_LINEAR, + "interval": "day", + "date_from": "2021-05-01 00:00:00", + "date_to": "2021-05-07 00:00:00", + "funnel_window_days": 7, + "events": [ + {"id": "step one", "order": 0}, + {"id": "step two", "order": 1}, + {"id": "step three", "order": 2}, + ], + } + + journeys_for( + { + "user_one": [ + {"event": "step one", "timestamp": datetime(2021, 5, 1, 0)}, + {"event": "step two", "timestamp": datetime(2021, 5, 1, 1)}, + {"event": "step three", "timestamp": datetime(2021, 5, 1, 2)}, + ] + }, + self.team, + ) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team, just_summarize=True).calculate().results + + self.assertEqual(7, len(results)) + + persons = self._get_actors_at_step(filters, "2021-05-01 00:00:00", False) + + self.assertEqual([person["distinct_ids"] for person in persons], [["user_one"]]) + + @snapshot_clickhouse_queries + def test_week_interval(self): + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_viz_type": "trends", + "display": TRENDS_LINEAR, + "interval": "week", + "date_from": "2021-05-01 00:00:00", + "date_to": "2021-05-07 00:00:00", + "funnel_window_days": 7, + "events": [ + {"id": "step one", "order": 0}, + {"id": "step two", "order": 1}, + {"id": "step three", "order": 2}, + ], + } + + journeys_for( + { + "user_one": [ + {"event": "step one", "timestamp": datetime(2021, 5, 1, 0)}, + {"event": "step two", "timestamp": datetime(2021, 5, 1, 1)}, + {"event": "step three", "timestamp": datetime(2021, 5, 1, 2)}, + ] + }, + self.team, + ) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team, just_summarize=True).calculate().results + persons = self._get_actors_at_step(filters, "2021-04-25 00:00:00", False) + + self.assertEqual(2, len(results)) + self.assertEqual([person["distinct_ids"] for person in persons], [["user_one"]]) + + def test_month_interval(self): + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_viz_type": "trends", + "display": TRENDS_LINEAR, + "interval": "month", + "date_from": "2020-01-01 00:00:00", + "date_to": "2020-07-01 00:00:00", + "funnel_window_days": 7, + "events": [ + {"id": "step one", "order": 0}, + {"id": "step two", "order": 1}, + {"id": "step three", "order": 2}, + ], + } + + journeys_for( + { + "user_one": [ + {"event": "step one", "timestamp": datetime(2020, 5, 1, 0)}, + {"event": "step two", "timestamp": datetime(2020, 5, 1, 1)}, + {"event": "step three", "timestamp": datetime(2020, 5, 1, 2)}, + ] + }, + self.team, + ) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team, just_summarize=True).calculate().results + + self.assertEqual( + results, + [ + { + "conversion_rate": 0.0, + "reached_from_step_count": 0, + "reached_to_step_count": 0, + "timestamp": date(2020, 1, 1), + }, + { + "conversion_rate": 0.0, + "reached_from_step_count": 0, + "reached_to_step_count": 0, + "timestamp": date(2020, 2, 1), + }, + { + "conversion_rate": 0.0, + "reached_from_step_count": 0, + "reached_to_step_count": 0, + "timestamp": date(2020, 3, 1), + }, + { + "conversion_rate": 0.0, + "reached_from_step_count": 0, + "reached_to_step_count": 0, + "timestamp": date(2020, 4, 1), + }, + { + "conversion_rate": 100.0, + "reached_from_step_count": 1, + "reached_to_step_count": 1, + "timestamp": date(2020, 5, 1), + }, + { + "conversion_rate": 0.0, + "reached_from_step_count": 0, + "reached_to_step_count": 0, + "timestamp": date(2020, 6, 1), + }, + { + "conversion_rate": 0.0, + "reached_from_step_count": 0, + "reached_to_step_count": 0, + "timestamp": date(2020, 7, 1), + }, + ], + ) + + persons = self._get_actors_at_step(filters, "2020-05-01 00:00:00", False) + + self.assertEqual([person["distinct_ids"] for person in persons], [["user_one"]]) + + def test_all_date_range(self): + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_viz_type": "trends", + "display": TRENDS_LINEAR, + "interval": "day", + "date_from": "all", + "funnel_window_days": 7, + "events": [ + {"id": "step one", "order": 0}, + {"id": "step two", "order": 1}, + {"id": "step three", "order": 2}, + ], + } + + journeys_for( + { + "user_one": [ + {"event": "step one", "timestamp": datetime(2021, 5, 1, 0)}, + {"event": "step two", "timestamp": datetime(2021, 5, 1, 1)}, + {"event": "step three", "timestamp": datetime(2021, 5, 1, 2)}, + ] + }, + self.team, + ) + + with freeze_time("2021-05-20T13:01:01Z"): + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team, just_summarize=True).calculate().results + + self.assertEqual(20, len(results)) + + persons = self._get_actors_at_step(filters, "2021-05-01 00:00:00", False) + + self.assertEqual([person["distinct_ids"] for person in persons], [["user_one"]]) + + def test_all_results_for_day_interval(self): + self._create_sample_data() + + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_viz_type": "trends", + "display": TRENDS_LINEAR, + "interval": "day", + "date_from": "2021-05-01 00:00:00", + "date_to": "2021-05-07 00:00:00", + "funnel_window_days": 7, + "events": [ + {"id": "step one", "order": 0}, + {"id": "step two", "order": 1}, + {"id": "step three", "order": 2}, + ], + } + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team, just_summarize=True).calculate().results + + saturday = results[0] # 5/1 + self.assertEqual(3, saturday["reached_to_step_count"]) + self.assertEqual(3, saturday["reached_from_step_count"]) + self.assertEqual(100, saturday["conversion_rate"]) + + sunday = results[1] # 5/2 + self.assertEqual(0, sunday["reached_to_step_count"]) + self.assertEqual(2, sunday["reached_from_step_count"]) + self.assertEqual(0, sunday["conversion_rate"]) + + monday = results[2] # 5/3 + self.assertEqual(0, monday["reached_to_step_count"]) + self.assertEqual(0, monday["reached_from_step_count"]) + self.assertEqual(0, monday["conversion_rate"]) + + tuesday = results[3] # 5/4 + self.assertEqual(0, tuesday["reached_to_step_count"]) + self.assertEqual(0, tuesday["reached_from_step_count"]) + self.assertEqual(0, tuesday["conversion_rate"]) + + wednesday = results[4] # 5/5 + self.assertEqual(0, wednesday["reached_to_step_count"]) + self.assertEqual(0, wednesday["reached_from_step_count"]) + self.assertEqual(0, wednesday["conversion_rate"]) + + thursday = results[5] # 5/6 + self.assertEqual(0, thursday["reached_to_step_count"]) + self.assertEqual(1, thursday["reached_from_step_count"]) + self.assertEqual(0, thursday["conversion_rate"]) + + friday = results[6] # 5/7 + self.assertEqual(0, friday["reached_to_step_count"]) + self.assertEqual(0, friday["reached_from_step_count"]) + self.assertEqual(0, friday["conversion_rate"]) + + def test_window_size_one_day(self): + self._create_sample_data() + + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_viz_type": "trends", + "display": TRENDS_LINEAR, + "interval": "day", + "date_from": "2021-05-01 00:00:00", + "date_to": "2021-05-07 00:00:00", + "funnel_window_interval": 1, + "events": [ + {"id": "step one", "order": 0}, + {"id": "step two", "order": 1}, + {"id": "step three", "order": 2}, + ], + } + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team, just_summarize=True).calculate().results + + saturday = results[0] # 5/1 + self.assertEqual(1, saturday["reached_to_step_count"]) + self.assertEqual(3, saturday["reached_from_step_count"]) + self.assertEqual(33.33, saturday["conversion_rate"]) + + sunday = results[1] # 5/2 + self.assertEqual(0, sunday["reached_to_step_count"]) + self.assertEqual(2, sunday["reached_from_step_count"]) + self.assertEqual(0, sunday["conversion_rate"]) + + monday = results[2] # 5/3 + self.assertEqual(0, monday["reached_to_step_count"]) + self.assertEqual(0, monday["reached_from_step_count"]) + self.assertEqual(0, monday["conversion_rate"]) + + tuesday = results[3] # 5/4 + self.assertEqual(0, tuesday["reached_to_step_count"]) + self.assertEqual(0, tuesday["reached_from_step_count"]) + self.assertEqual(0, tuesday["conversion_rate"]) + + wednesday = results[4] # 5/5 + self.assertEqual(0, wednesday["reached_to_step_count"]) + self.assertEqual(0, wednesday["reached_from_step_count"]) + self.assertEqual(0, wednesday["conversion_rate"]) + + thursday = results[5] # 5/6 + self.assertEqual(0, thursday["reached_to_step_count"]) + self.assertEqual(1, thursday["reached_from_step_count"]) + self.assertEqual(0, thursday["conversion_rate"]) + + friday = results[6] # 5/7 + self.assertEqual(0, friday["reached_to_step_count"]) + self.assertEqual(0, friday["reached_from_step_count"]) + self.assertEqual(0, friday["conversion_rate"]) + + def test_period_not_final(self): + now = datetime.now() + + journeys_for( + { + "user_eight": [ + {"event": "step one", "timestamp": now}, + {"event": "step two", "timestamp": now + timedelta(minutes=1)}, + {"event": "step three", "timestamp": now + timedelta(minutes=2)}, + ] + }, + self.team, + ) + + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_viz_type": "trends", + "display": TRENDS_LINEAR, + "interval": "day", + "date_from": (now - timedelta(1)).strftime(FORMAT_TIME), + "date_to": now.strftime(FORMAT_TIME_DAY_END), + "funnel_window_days": 1, + "events": [ + {"id": "step one", "order": 0}, + {"id": "step two", "order": 1}, + {"id": "step three", "order": 2}, + ], + } + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team, just_summarize=True).calculate().results + + self.assertEqual(len(results), 2) + + day = results[0] # yesterday + self.assertEqual(day["reached_from_step_count"], 0) + self.assertEqual(day["reached_to_step_count"], 0) + self.assertEqual(day["conversion_rate"], 0) + self.assertEqual( + day["timestamp"].replace(tzinfo=ZoneInfo("UTC")), + (datetime(now.year, now.month, now.day) - timedelta(1)).replace(tzinfo=ZoneInfo("UTC")), + ) + + day = results[1] # today + self.assertEqual(day["reached_from_step_count"], 1) + self.assertEqual(day["reached_to_step_count"], 1) + self.assertEqual(day["conversion_rate"], 100) + self.assertEqual( + day["timestamp"].replace(tzinfo=ZoneInfo("UTC")), + datetime(now.year, now.month, now.day).replace(tzinfo=ZoneInfo("UTC")), + ) + + def test_two_runs_by_single_user_in_one_period(self): + journeys_for( + { + "user_one": [ + # 1st full run + {"event": "step one", "timestamp": datetime(2021, 5, 1, 0)}, + {"event": "step two", "timestamp": datetime(2021, 5, 1, 1)}, + {"event": "step three", "timestamp": datetime(2021, 5, 1, 2)}, + # 2nd full run + {"event": "step one", "timestamp": datetime(2021, 5, 1, 13)}, + {"event": "step two", "timestamp": datetime(2021, 5, 1, 14)}, + {"event": "step three", "timestamp": datetime(2021, 5, 1, 15)}, + ] + }, + self.team, + ) + + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_viz_type": "trends", + "display": TRENDS_LINEAR, + "interval": "day", + "date_from": "2021-05-01 00:00:00", + "date_to": "2021-05-01 23:59:59", + "funnel_window_days": 1, + "events": [ + {"id": "step one", "order": 0}, + {"id": "step two", "order": 1}, + {"id": "step three", "order": 2}, + ], + } + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team, just_summarize=True).calculate().results + + self.assertEqual(len(results), 1) + + day = results[0] # 2021-05-01 + self.assertEqual(day["reached_from_step_count"], 1) + self.assertEqual(day["reached_to_step_count"], 1) + self.assertEqual(day["conversion_rate"], 100) + + def test_steps_performed_in_period_but_in_reverse(self): + journeys_for( + { + "user_one": [ + {"event": "step three", "timestamp": datetime(2021, 5, 1, 1)}, + {"event": "step two", "timestamp": datetime(2021, 5, 1, 2)}, + {"event": "step one", "timestamp": datetime(2021, 5, 1, 3)}, + ] + }, + self.team, + ) + + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_viz_type": "trends", + "display": TRENDS_LINEAR, + "interval": "day", + "date_from": "2021-05-01 00:00:00", + "date_to": "2021-05-01 23:59:59", + "funnel_window_days": 1, + "events": [ + {"id": "step one", "order": 0}, + {"id": "step two", "order": 1}, + {"id": "step three", "order": 2}, + ], + } + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team, just_summarize=True).calculate().results + + self.assertEqual(len(results), 1) + + day_1 = results[0] # 2021-05-01 + self.assertEqual(day_1["reached_from_step_count"], 1) + self.assertEqual(day_1["reached_to_step_count"], 0) + self.assertEqual(day_1["conversion_rate"], 0) + + def test_one_person_in_multiple_periods_and_windows(self): + journeys_for( + { + "user_one": [ + # 1st user's 1st complete run + {"event": "step one", "timestamp": datetime(2021, 5, 1, 1)}, + {"event": "step two", "timestamp": datetime(2021, 5, 1, 2)}, + {"event": "step three", "timestamp": datetime(2021, 5, 1, 3)}, + # 1st user's incomplete run + {"event": "step one", "timestamp": datetime(2021, 5, 3, 1)}, + {"event": "step two", "timestamp": datetime(2021, 5, 3, 2)}, + # 1st user's 2nd complete run + {"event": "step one", "timestamp": datetime(2021, 5, 4, 11)}, + {"event": "step two", "timestamp": datetime(2021, 5, 4, 12)}, + {"event": "step three", "timestamp": datetime(2021, 5, 4, 13)}, + ], + "user_two": [ + # 2nd user's incomplete run + {"event": "step one", "timestamp": datetime(2021, 5, 4, 18)} + ], + }, + self.team, + ) + + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_viz_type": "trends", + "display": TRENDS_LINEAR, + "interval": "day", + "date_from": "2021-05-01 00:00:00", + "date_to": "2021-05-04 23:59:59", + "funnel_window_interval": 1, + "events": [ + {"id": "step one", "order": 0}, + {"id": "step two", "order": 1}, + {"id": "step three", "order": 2}, + ], + } + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team, just_summarize=True).calculate().results + + self.assertEqual(len(results), 4) + + day_1 = results[0] # 2021-05-01 + self.assertEqual(day_1["reached_from_step_count"], 1) + self.assertEqual(day_1["reached_to_step_count"], 1) + self.assertEqual(day_1["conversion_rate"], 100) + + day_2 = results[1] # 2021-05-02 + self.assertEqual(day_2["reached_from_step_count"], 0) + self.assertEqual(day_2["reached_to_step_count"], 0) + self.assertEqual(day_2["conversion_rate"], 0) + + day_3 = results[2] # 2021-05-03 + self.assertEqual(day_3["reached_from_step_count"], 1) + self.assertEqual(day_3["reached_to_step_count"], 0) + self.assertEqual(day_3["conversion_rate"], 0) + + day_4 = results[3] # 2021-05-04 + self.assertEqual(day_4["reached_from_step_count"], 2) + self.assertEqual(day_4["reached_to_step_count"], 1) + self.assertEqual(day_4["conversion_rate"], 50) + + # 1 user who dropped off starting # 2021-05-04 + funnel_trends_persons_existent_dropped_off_results = self._get_actors_at_step( + filters, "2021-05-04 00:00:00", True + ) + + self.assertEqual(len(funnel_trends_persons_existent_dropped_off_results), 1) + self.assertEqual( + [person["distinct_ids"] for person in funnel_trends_persons_existent_dropped_off_results], + [["user_two"]], + ) + + # 1 user who converted starting # 2021-05-04 + funnel_trends_persons_existent_dropped_off_results = self._get_actors_at_step( + filters, "2021-05-04 00:00:00", False + ) + + self.assertEqual(len(funnel_trends_persons_existent_dropped_off_results), 1) + self.assertEqual( + [person["distinct_ids"] for person in funnel_trends_persons_existent_dropped_off_results], + [["user_one"]], + ) + + def test_from_second_step(self): + journeys_for( + { + "user_one": [ + # 1st user's complete run - should fall into the 2021-05-01 bucket even though counting only from 2nd step + {"event": "step one", "timestamp": datetime(2021, 5, 1, 1)}, + {"event": "step two", "timestamp": datetime(2021, 5, 2, 2)}, + {"event": "step three", "timestamp": datetime(2021, 5, 2, 3)}, + ], + "user_two": [ + # 2nd user's incomplete run - should not count at all since not reaching 2nd step + {"event": "step one", "timestamp": datetime(2021, 5, 1, 1)} + ], + "user_three": [ + # 3rd user's incomplete run - should not count at all since reaching 2nd step BUT not the 1st one + {"event": "step two", "timestamp": datetime(2021, 5, 2, 2)}, + {"event": "step three", "timestamp": datetime(2021, 5, 2, 3)}, + ], + "user_four": [ + # 4th user's incomplete run - should fall into the 2021-05-02 bucket as entered but not completed + {"event": "step one", "timestamp": datetime(2021, 5, 2, 1)}, + {"event": "step two", "timestamp": datetime(2021, 5, 2, 2)}, + ], + }, + self.team, + ) + + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_viz_type": "trends", + "display": TRENDS_LINEAR, + "interval": "day", + "date_from": "2021-05-01 00:00:00", + "date_to": "2021-05-02 23:59:59", + "funnel_window_days": 3, + "funnel_from_step": 1, + "events": [ + {"id": "step one", "order": 0}, + {"id": "step two", "order": 1}, + {"id": "step three", "order": 2}, + ], + } + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team, just_summarize=True).calculate().results + + self.assertEqual(len(results), 2) + + day_1 = results[0] # 2021-05-01 + self.assertEqual(day_1["reached_from_step_count"], 1) + self.assertEqual(day_1["reached_to_step_count"], 1) + self.assertEqual(day_1["conversion_rate"], 100) + + day_2 = results[1] # 2021-05-02 + self.assertEqual(day_2["reached_from_step_count"], 1) + self.assertEqual(day_2["reached_to_step_count"], 0) + self.assertEqual(day_2["conversion_rate"], 0) + + def test_to_second_step(self): + journeys_for( + { + "user_one": [ + # 1st user's complete run - should fall into the 2021-05-01 bucket + {"event": "step one", "timestamp": datetime(2021, 5, 1, 1)}, + {"event": "step two", "timestamp": datetime(2021, 5, 2, 2)}, + {"event": "step three", "timestamp": datetime(2021, 5, 2, 3)}, + ], + "user_two": [ + # 2nd user's incomplete run - should count as incomplete + {"event": "step one", "timestamp": datetime(2021, 5, 1, 1)} + ], + "user_three": [ + # 3rd user's incomplete run - should not count at all since reaching 2nd step BUT not the 1st one + {"event": "step two", "timestamp": datetime(2021, 5, 2, 2)}, + {"event": "step three", "timestamp": datetime(2021, 5, 2, 3)}, + ], + "user_four": [ + # 4th user's incomplete run - should fall into the 2021-05-02 bucket as entered and completed + {"event": "step one", "timestamp": datetime(2021, 5, 2, 1)}, + {"event": "step two", "timestamp": datetime(2021, 5, 2, 2)}, + ], + }, + self.team, + ) + + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_viz_type": "trends", + "display": TRENDS_LINEAR, + "interval": "day", + "date_from": "2021-05-01 00:00:00", + "date_to": "2021-05-02 23:59:59", + "funnel_window_days": 3, + "funnel_to_step": 1, + "events": [ + {"id": "step one", "order": 0}, + {"id": "step two", "order": 1}, + {"id": "step three", "order": 2}, + ], + } + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team, just_summarize=True).calculate().results + + self.assertEqual(len(results), 2) + + day_1 = results[0] # 2021-05-01 + self.assertEqual(day_1["reached_from_step_count"], 2) + self.assertEqual(day_1["reached_to_step_count"], 1) + self.assertEqual(day_1["conversion_rate"], 50) + + day_2 = results[1] # 2021-05-02 + self.assertEqual(day_2["reached_from_step_count"], 1) + self.assertEqual(day_2["reached_to_step_count"], 1) + self.assertEqual(day_2["conversion_rate"], 100) + + def test_one_person_in_multiple_periods_and_windows_in_unordered_funnel(self): + journeys_for( + { + "user_one": [ + # 1st user's 1st complete run + {"event": "step one", "timestamp": datetime(2021, 5, 1, 1)}, + {"event": "step three", "timestamp": datetime(2021, 5, 1, 2)}, + {"event": "step two", "timestamp": datetime(2021, 5, 1, 3)}, + # 1st user's incomplete run + {"event": "step two", "timestamp": datetime(2021, 5, 3, 1)}, + {"event": "step one", "timestamp": datetime(2021, 5, 3, 2)}, + # 1st user's 2nd complete run + {"event": "step three", "timestamp": datetime(2021, 5, 4, 11)}, + {"event": "step one", "timestamp": datetime(2021, 5, 4, 12)}, + {"event": "step two", "timestamp": datetime(2021, 5, 4, 13)}, + ], + "user_two": [ + # 2nd user's incomplete run + {"event": "step one", "timestamp": datetime(2021, 5, 4, 18)} + ], + }, + self.team, + ) + + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_viz_type": "trends", + "display": TRENDS_LINEAR, + "interval": "day", + "date_from": "2021-05-01 00:00:00", + "date_to": "2021-05-04 23:59:59", + "funnel_window_interval": 1, + "funnel_order_type": FunnelOrderType.UNORDERED, + "events": [ + {"id": "step one", "order": 0}, + {"id": "step two", "order": 1}, + {"id": "step three", "order": 2}, + ], + } + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team, just_summarize=True).calculate().results + + self.assertEqual(len(results), 4) + + day_1 = results[0] # 2021-05-01 + self.assertEqual(day_1["reached_from_step_count"], 1) + self.assertEqual(day_1["reached_to_step_count"], 1) + self.assertEqual(day_1["conversion_rate"], 100) + + day_2 = results[1] # 2021-05-02 + self.assertEqual(day_2["reached_from_step_count"], 0) + self.assertEqual(day_2["reached_to_step_count"], 0) + self.assertEqual(day_2["conversion_rate"], 0) + + day_3 = results[2] # 2021-05-03 + self.assertEqual(day_3["reached_from_step_count"], 1) + self.assertEqual(day_3["reached_to_step_count"], 0) + self.assertEqual(day_3["conversion_rate"], 0) + + day_4 = results[3] # 2021-05-04 + self.assertEqual(day_4["reached_from_step_count"], 2) + self.assertEqual(day_4["reached_to_step_count"], 1) + self.assertEqual(day_4["conversion_rate"], 50) + + # 1 user who dropped off starting # 2021-05-04 + funnel_trends_persons_existent_dropped_off_results = self._get_actors_at_step( + filters, "2021-05-04 00:00:00", True + ) + + self.assertEqual(len(funnel_trends_persons_existent_dropped_off_results), 1) + self.assertEqual( + [person["distinct_ids"] for person in funnel_trends_persons_existent_dropped_off_results], + [["user_two"]], + ) + + # 1 user who converted starting # 2021-05-04 + funnel_trends_persons_existent_dropped_off_results = self._get_actors_at_step( + filters, "2021-05-04 00:00:00", False + ) + + self.assertEqual(len(funnel_trends_persons_existent_dropped_off_results), 1) + self.assertEqual( + [person["distinct_ids"] for person in funnel_trends_persons_existent_dropped_off_results], + [["user_one"]], + ) + + def test_one_person_in_multiple_periods_and_windows_in_strict_funnel(self): + journeys_for( + { + "user_one": [ + # 1st user's 1st complete run + {"event": "step one", "timestamp": datetime(2021, 5, 1, 1)}, + {"event": "step two", "timestamp": datetime(2021, 5, 1, 2)}, + {"event": "step three", "timestamp": datetime(2021, 5, 1, 3)}, + # 1st user's incomplete run + {"event": "step one", "timestamp": datetime(2021, 5, 3, 1)}, + {"event": "step two", "timestamp": datetime(2021, 5, 3, 2)}, + # broken because strict + {"event": "blah", "timestamp": datetime(2021, 5, 3, 2, 30)}, + {"event": "step three", "timestamp": datetime(2021, 5, 3, 3)}, + # 1st user's 2nd complete run + {"event": "step one", "timestamp": datetime(2021, 5, 4, 11)}, + {"event": "step two", "timestamp": datetime(2021, 5, 4, 12)}, + {"event": "step three", "timestamp": datetime(2021, 5, 4, 13)}, + ], + "user_two": [ + # 2nd user's incomplete run + {"event": "step one", "timestamp": datetime(2021, 5, 4, 18)}, + # broken because strict + {"event": "blah", "timestamp": datetime(2021, 5, 4, 18, 20)}, + {"event": "step two", "timestamp": datetime(2021, 5, 4, 19)}, + ], + }, + self.team, + ) + + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_viz_type": "trends", + "display": TRENDS_LINEAR, + "interval": "day", + "date_from": "2021-05-01 00:00:00", + "date_to": "2021-05-04 23:59:59", + "funnel_order_type": FunnelOrderType.STRICT, + "funnel_window_days": 1, + "events": [ + {"id": "step one", "order": 0}, + {"id": "step two", "order": 1}, + {"id": "step three", "order": 2}, + ], + } + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team, just_summarize=True).calculate().results + + self.assertEqual(len(results), 4) + + day_1 = results[0] # 2021-05-01 + self.assertEqual(day_1["reached_from_step_count"], 1) + self.assertEqual(day_1["reached_to_step_count"], 1) + self.assertEqual(day_1["conversion_rate"], 100) + + day_2 = results[1] # 2021-05-02 + self.assertEqual(day_2["reached_from_step_count"], 0) + self.assertEqual(day_2["reached_to_step_count"], 0) + self.assertEqual(day_2["conversion_rate"], 0) + + day_3 = results[2] # 2021-05-03 + self.assertEqual(day_3["reached_from_step_count"], 1) + self.assertEqual(day_3["reached_to_step_count"], 0) + self.assertEqual(day_3["conversion_rate"], 0) + + day_4 = results[3] # 2021-05-04 + self.assertEqual(day_4["reached_from_step_count"], 2) + self.assertEqual(day_4["reached_to_step_count"], 1) + self.assertEqual(day_4["conversion_rate"], 50) + + def test_funnel_step_breakdown_event(self): + journeys_for( + { + "user_one": [ + { + "event": "step one", + "timestamp": datetime(2021, 5, 1), + "properties": {"$browser": "Chrome"}, + }, + { + "event": "step two", + "timestamp": datetime(2021, 5, 3), + "properties": {"$browser": "Chrome"}, + }, + { + "event": "step three", + "timestamp": datetime(2021, 5, 5), + "properties": {"$browser": "Chrome"}, + }, + ], + "user_two": [ + { + "event": "step one", + "timestamp": datetime(2021, 5, 2), + "properties": {"$browser": "Chrome"}, + }, + { + "event": "step two", + "timestamp": datetime(2021, 5, 3), + "properties": {"$browser": "Chrome"}, + }, + { + "event": "step three", + "timestamp": datetime(2021, 5, 5), + "properties": {"$browser": "Chrome"}, + }, + ], + "user_three": [ + { + "event": "step one", + "timestamp": datetime(2021, 5, 3), + "properties": {"$browser": "Safari"}, + }, + { + "event": "step two", + "timestamp": datetime(2021, 5, 4), + "properties": {"$browser": "Safari"}, + }, + { + "event": "step three", + "timestamp": datetime(2021, 5, 5), + "properties": {"$browser": "Safari"}, + }, + ], + }, + self.team, + ) + + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_viz_type": "trends", + "display": TRENDS_LINEAR, + "interval": "day", + "date_from": "2021-05-01 00:00:00", + "date_to": "2021-05-13 23:59:59", + "funnel_window_days": 7, + "events": [ + {"id": "step one", "order": 0}, + {"id": "step two", "order": 1}, + {"id": "step three", "order": 2}, + ], + "breakdown_type": "event", + "breakdown": "$browser", + } + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self.assertEqual(len(results), 2) + + for res in results: + if res["breakdown_value"] == ["Chrome"]: + self.assertEqual( + res["data"], + [ + 100.0, + 100.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + ], + ) + elif res["breakdown_value"] == ["Safari"]: + self.assertEqual( + res["data"], + [0.0, 0.0, 100.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + ) + else: + self.fail(msg="Invalid breakdown value") + + def test_funnel_step_breakdown_person(self): + _create_person(distinct_ids=["user_one"], team=self.team, properties={"$browser": "Chrome"}) + _create_person(distinct_ids=["user_two"], team=self.team, properties={"$browser": "Chrome"}) + _create_person( + distinct_ids=["user_three"], + team=self.team, + properties={"$browser": "Safari"}, + ) + journeys_for( + { + "user_one": [ + {"event": "step one", "timestamp": datetime(2021, 5, 1)}, + {"event": "step two", "timestamp": datetime(2021, 5, 3)}, + {"event": "step three", "timestamp": datetime(2021, 5, 5)}, + ], + "user_two": [ + {"event": "step one", "timestamp": datetime(2021, 5, 2)}, + {"event": "step two", "timestamp": datetime(2021, 5, 3)}, + {"event": "step three", "timestamp": datetime(2021, 5, 5)}, + ], + "user_three": [ + {"event": "step one", "timestamp": datetime(2021, 5, 3)}, + {"event": "step two", "timestamp": datetime(2021, 5, 4)}, + {"event": "step three", "timestamp": datetime(2021, 5, 5)}, + ], + }, + self.team, + ) + + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_viz_type": "trends", + "display": TRENDS_LINEAR, + "interval": "day", + "date_from": "2021-05-01 00:00:00", + "date_to": "2021-05-13 23:59:59", + "funnel_window_days": 7, + "events": [ + {"id": "step one", "order": 0}, + {"id": "step two", "order": 1}, + {"id": "step three", "order": 2}, + ], + "breakdown_type": "person", + "breakdown": "$browser", + } + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self.assertEqual(len(results), 2) + + for res in results: + if res["breakdown_value"] == ["Chrome"]: + self.assertEqual( + res["data"], + [ + 100.0, + 100.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + ], + ) + elif res["breakdown_value"] == ["Safari"]: + self.assertEqual( + res["data"], + [0.0, 0.0, 100.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + ) + else: + self.fail(msg="Invalid breakdown value") + + def test_funnel_trend_cohort_breakdown(self): + _create_person(distinct_ids=["user_one"], team=self.team, properties={"key": "value"}) + _create_person(distinct_ids=["user_two"], team=self.team, properties={"key": "value"}) + _create_person( + distinct_ids=["user_three"], + team=self.team, + properties={"$browser": "Safari"}, + ) + + journeys_for( + { + "user_one": [ + {"event": "step one", "timestamp": datetime(2021, 5, 1)}, + {"event": "step two", "timestamp": datetime(2021, 5, 3)}, + {"event": "step three", "timestamp": datetime(2021, 5, 5)}, + ], + "user_two": [ + {"event": "step one", "timestamp": datetime(2021, 5, 2)}, + {"event": "step two", "timestamp": datetime(2021, 5, 3)}, + {"event": "step three", "timestamp": datetime(2021, 5, 5)}, + ], + "user_three": [ + {"event": "step one", "timestamp": datetime(2021, 5, 3)}, + {"event": "step two", "timestamp": datetime(2021, 5, 4)}, + {"event": "step three", "timestamp": datetime(2021, 5, 5)}, + ], + }, + self.team, + ) + + cohort = Cohort.objects.create( + team=self.team, + name="test_cohort", + groups=[{"properties": [{"key": "key", "value": "value", "type": "person"}]}], + ) + cohort.calculate_people_ch(pending_version=0) + + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_viz_type": "trends", + "display": TRENDS_LINEAR, + "interval": "day", + "date_from": "2021-05-01 00:00:00", + "date_to": "2021-05-13 23:59:59", + "funnel_window_days": 7, + "events": [ + {"id": "step one", "order": 0}, + {"id": "step two", "order": 1}, + {"id": "step three", "order": 2}, + ], + "breakdown_type": "cohort", + "breakdown": [cohort.pk], + } + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self.assertEqual(len(results), 1) + self.assertEqual( + results[0]["data"], + [100.0, 100.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + ) + + @snapshot_clickhouse_queries + def test_timezones_trends(self): + journeys_for( + { + "user_one": [ + { + "event": "step one", + "timestamp": datetime(2021, 5, 1, 10), + }, # 04-30 in pacific + { + "event": "step two", + "timestamp": datetime(2021, 5, 1, 11), + }, # today in pacific + { + "event": "step three", + "timestamp": datetime(2021, 5, 1, 12), + }, # today in pacific + ], + "user_two": [ + { + "event": "step one", + "timestamp": datetime(2021, 5, 1, 1), + }, # 04-30 in pacific + { + "event": "step two", + "timestamp": datetime(2021, 5, 1, 2), + }, # 04-30 in pacific + { + "event": "step three", + "timestamp": datetime(2021, 5, 1, 3), + }, # 04-30 in pacific + ], + "user_three": [ + { + "event": "step one", + "timestamp": datetime(2021, 5, 1, 1), + }, # 04-30 in pacific + { + "event": "step two", + "timestamp": datetime(2021, 5, 1, 10), + }, # today in pacific + { + "event": "step three", + "timestamp": datetime(2021, 5, 1, 11), + }, # today in pacific + ], + "user_eight": [], + }, + self.team, + ) + + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_viz_type": "trends", + "display": TRENDS_LINEAR, + "interval": "day", + "date_from": "2021-04-30 00:00:00", + "date_to": "2021-05-07 00:00:00", + "funnel_window_days": 7, + "events": [ + {"id": "step one", "order": 0}, + {"id": "step two", "order": 1}, + {"id": "step three", "order": 2}, + ], + } + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team, just_summarize=True).calculate().results + + self.team.timezone = "US/Pacific" + self.team.save() + + results_pacific = FunnelsQueryRunner(query=query, team=self.team, just_summarize=True).calculate().results + + saturday = results[1] # 5/1 + self.assertEqual(3, saturday["reached_to_step_count"]) + self.assertEqual(3, saturday["reached_from_step_count"]) + self.assertEqual(100.0, saturday["conversion_rate"]) + + friday_pacific = results_pacific[0] + self.assertEqual(2, friday_pacific["reached_to_step_count"]) + self.assertEqual(2, friday_pacific["reached_from_step_count"]) + self.assertEqual(100.0, friday_pacific["conversion_rate"]) + saturday_pacific = results_pacific[1] + self.assertEqual(1, saturday_pacific["reached_to_step_count"]) + self.assertEqual(1, saturday_pacific["reached_from_step_count"]) + + def test_trend_for_hour_based_conversion_window(self): + journeys_for( + { + "user_one": [ + # Converts in 2 hours + {"event": "step one", "timestamp": datetime(2021, 5, 1, 10)}, + {"event": "step two", "timestamp": datetime(2021, 5, 1, 11)}, + {"event": "step three", "timestamp": datetime(2021, 5, 1, 12)}, + ], + "user_two": [ + # Converts in 4 hours (not fast enough) + {"event": "step one", "timestamp": datetime(2021, 5, 1, 10)}, + {"event": "step two", "timestamp": datetime(2021, 5, 1, 11)}, + {"event": "step three", "timestamp": datetime(2021, 5, 1, 14)}, + ], + }, + self.team, + ) + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_viz_type": "trends", + "display": TRENDS_LINEAR, + "funnel_window_interval": 3, + "funnel_window_interval_unit": "hour", + "interval": "day", + "date_from": "2021-05-01 00:00:00", + "events": [ + {"id": "step one", "order": 0}, + {"id": "step two", "order": 1}, + {"id": "step three", "order": 2}, + ], + } + + with freeze_time("2021-05-06T23:40:59Z"): + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team, just_summarize=True).calculate().results + conversion_rates = [row["conversion_rate"] for row in results] + self.assertEqual(conversion_rates, [50.0, 0.0, 0.0, 0.0, 0.0, 0.0]) diff --git a/posthog/hogql_queries/insights/test/__snapshots__/test_lifecycle_query_runner.ambr b/posthog/hogql_queries/insights/test/__snapshots__/test_lifecycle_query_runner.ambr index 1320f6403b5441..ef3b23794866d8 100644 --- a/posthog/hogql_queries/insights/test/__snapshots__/test_lifecycle_query_runner.ambr +++ b/posthog/hogql_queries/insights/test/__snapshots__/test_lifecycle_query_runner.ambr @@ -79,7 +79,7 @@ WHERE and(equals(events.team_id, 2), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'UTC'))), toIntervalDay(1))), less(toTimeZone(events.timestamp, 'UTC'), plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'UTC'))), toIntervalDay(1))), ifNull(in(person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 5)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 4)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0), equals(events.event, '$pageview')) GROUP BY person_id) diff --git a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr index b1886ee011705d..83077cb0a40333 100644 --- a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr +++ b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr @@ -85,7 +85,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), ifNull(equals(e__pdi__person.`properties___$bool_prop`, 'x'), 0), and(equals(e.event, 'sign up'), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 6)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 5)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))) GROUP BY day_start) @@ -172,7 +172,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, '$bool_prop'), ''), 'null'), '^"|"$', ''), 'x'), 0), and(equals(e.event, 'sign up'), ifNull(in(ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id), (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 7)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 6)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))) GROUP BY day_start) @@ -688,7 +688,7 @@ WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), and(or(ifNull(equals(e__pdi__person.properties___name, 'p1'), 0), ifNull(equals(e__pdi__person.properties___name, 'p2'), 0), ifNull(equals(e__pdi__person.properties___name, 'p3'), 0)), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 26)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 25)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0)))) GROUP BY value @@ -757,7 +757,7 @@ WHERE and(equals(e.team_id, 2), and(and(equals(e.event, '$pageview'), and(or(ifNull(equals(e__pdi__person.properties___name, 'p1'), 0), ifNull(equals(e__pdi__person.properties___name, 'p2'), 0), ifNull(equals(e__pdi__person.properties___name, 'p3'), 0)), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 26)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 25)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'val'], ['$$_posthog_breakdown_other_$$', 'val'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0)) GROUP BY timestamp, actor_id, @@ -1592,7 +1592,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 39)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 38)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))) GROUP BY value @@ -1640,7 +1640,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 39)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 38)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0)), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0))) GROUP BY day_start, @@ -1691,7 +1691,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id), (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 40)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 39)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))) GROUP BY value @@ -1738,7 +1738,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id), (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 40)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 39)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0)), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0))) GROUP BY day_start, diff --git a/posthog/hogql_queries/insights/utils/utils.py b/posthog/hogql_queries/insights/utils/utils.py new file mode 100644 index 00000000000000..c3b99c6a3b625d --- /dev/null +++ b/posthog/hogql_queries/insights/utils/utils.py @@ -0,0 +1,12 @@ +from typing import List, Optional +from posthog.hogql import ast +from posthog.models.team.team import Team, WeekStartDay +from posthog.queries.util import get_trunc_func_ch + + +def get_start_of_interval_hogql(interval: str, *, team: Team, source: Optional[ast.Expr] = None) -> ast.Expr: + trunc_func = get_trunc_func_ch(interval) + trunc_func_args: List[ast.Expr] = [source] if source else [ast.Field(chain=["timestamp"])] + if trunc_func == "toStartOfWeek": + trunc_func_args.append(ast.Constant(value=int((WeekStartDay(team.week_start_day or 0)).clickhouse_mode))) + return ast.Call(name=trunc_func, args=trunc_func_args)