From ec59e6f43d6a5550548db1cf7ce2b1e018a3e854 Mon Sep 17 00:00:00 2001 From: timgl Date: Fri, 14 Jun 2024 10:44:58 +0100 Subject: [PATCH] perf: Remove extra breakdown query for trends (#22885) * perf: Remove extra breakdown query for trends * fix * fix actor breakdown * fix breakdown tests * fix * remove most breakdown_values_override calls * fix tests * fix baseline * remove unnecessary test * fix histogram * fix: Don't allow window function with distinct as it's wrong * fix tests * fix * Update query snapshots * fix histogram --------- Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com> --- mypy-baseline.txt | 22 +- posthog/hogql/functions/mapping.py | 1 + posthog/hogql/printer.py | 5 +- .../insights/trends/breakdown.py | 222 +- .../insights/trends/breakdown_values.py | 279 --- .../test/__snapshots__/test_trends.ambr | 2052 +++++++++++------ .../test_trends_data_warehouse_query.ambr | 96 +- .../insights/trends/test/test_trends.py | 10 +- .../trends/test/test_trends_persons.py | 1 - .../trends/test/test_trends_query_runner.py | 25 +- .../trends/trends_actors_query_builder.py | 3 +- .../insights/trends/trends_query_builder.py | 119 +- .../insights/trends/trends_query_runner.py | 103 +- posthog/queries/trends/breakdown.py | 5 +- .../trends/test/test_paging_breakdowns.py | 3 +- 15 files changed, 1621 insertions(+), 1325 deletions(-) delete mode 100644 posthog/hogql_queries/insights/trends/breakdown_values.py diff --git a/mypy-baseline.txt b/mypy-baseline.txt index 300c901c82196..00e42297ced4f 100644 --- a/mypy-baseline.txt +++ b/mypy-baseline.txt @@ -152,6 +152,15 @@ ee/billing/quota_limiting.py:0: error: "object" has no attribute "get" [attr-de ee/billing/quota_limiting.py:0: error: Unsupported target for indexed assignment ("object") [index] ee/billing/quota_limiting.py:0: error: Unsupported target for indexed assignment ("object") [index] posthog/tasks/email.py:0: error: Module "django.utils.timezone" does not explicitly export attribute "timedelta" [attr-defined] +posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown_histogram_bin_count" [union-attr] +posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown_type" [union-attr] +posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown_type" [union-attr] +posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown" [union-attr] +posthog/hogql_queries/insights/trends/breakdown.py:0: error: Argument 1 to "parse_expr" has incompatible type "str | float | list[str | float] | Any | None"; expected "str" [arg-type] +posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown_type" [union-attr] +posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown" [union-attr] +posthog/hogql_queries/insights/trends/breakdown.py:0: error: Argument "breakdown_field" to "get_properties_chain" has incompatible type "str | float | list[str | float] | Any | None"; expected "str" [arg-type] +posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown_group_type_index" [union-attr] posthog/hogql_queries/insights/trends/aggregation_operations.py:0: error: List item 1 has incompatible type "str | None"; expected "str" [list-item] posthog/hogql_queries/insights/trends/aggregation_operations.py:0: error: Argument "chain" to "Field" has incompatible type "list[str]"; expected "list[str | int]" [arg-type] posthog/hogql_queries/insights/trends/aggregation_operations.py:0: note: "List" is invariant -- see https://mypy.readthedocs.io/en/stable/common_issues.html#variance @@ -298,19 +307,6 @@ posthog/hogql/query.py:0: error: Subclass of "SelectQuery" and "SelectUnionQuery posthog/queries/person_query.py:0: error: Incompatible type for lookup 'pk': (got "str | int | list[str]", expected "str | int") [misc] posthog/queries/event_query/event_query.py:0: error: Incompatible type for lookup 'pk': (got "str | int | list[str]", expected "str | int") [misc] posthog/hogql_queries/sessions_timeline_query_runner.py:0: error: Statement is unreachable [unreachable] -posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown_type" [union-attr] -posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown_histogram_bin_count" [union-attr] -posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown_type" [union-attr] -posthog/hogql_queries/insights/trends/breakdown.py:0: error: Argument "exprs" to "Or" has incompatible type "list[CompareOperation]"; expected "list[Expr]" [arg-type] -posthog/hogql_queries/insights/trends/breakdown.py:0: note: "List" is invariant -- see https://mypy.readthedocs.io/en/stable/common_issues.html#variance -posthog/hogql_queries/insights/trends/breakdown.py:0: note: Consider using "Sequence" instead, which is covariant -posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown_type" [union-attr] -posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown" [union-attr] -posthog/hogql_queries/insights/trends/breakdown.py:0: error: Argument 1 to "parse_expr" has incompatible type "str | float | list[str | float] | Any | None"; expected "str" [arg-type] -posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown_type" [union-attr] -posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown" [union-attr] -posthog/hogql_queries/insights/trends/breakdown.py:0: error: Argument "breakdown_field" to "get_properties_chain" has incompatible type "str | float | list[str | float] | Any | None"; expected "str" [arg-type] -posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown_group_type_index" [union-attr] posthog/hogql_queries/hogql_query_runner.py:0: error: Statement is unreachable [unreachable] posthog/hogql_queries/hogql_query_runner.py:0: error: Incompatible return value type (got "SelectQuery | SelectUnionQuery", expected "SelectQuery") [return-value] posthog/hogql_queries/events_query_runner.py:0: error: Statement is unreachable [unreachable] diff --git a/posthog/hogql/functions/mapping.py b/posthog/hogql/functions/mapping.py index bda37830c4b37..d079b7878534c 100644 --- a/posthog/hogql/functions/mapping.py +++ b/posthog/hogql/functions/mapping.py @@ -466,6 +466,7 @@ def compare_types(arg_types: list[ConstantType], sig_arg_types: tuple[ConstantTy "timeStampSub": HogQLFunctionMeta("timeStampSub", 2, 2), "now": HogQLFunctionMeta("now64", 0, 1, tz_aware=True, case_sensitive=False), "nowInBlock": HogQLFunctionMeta("nowInBlock", 1, 1), + "rowNumberInAllBlocks": HogQLFunctionMeta("rowNumberInAllBlocks", 0, 0), "today": HogQLFunctionMeta("today"), "yesterday": HogQLFunctionMeta("yesterday"), "timeSlot": HogQLFunctionMeta("timeSlot", 1, 1), diff --git a/posthog/hogql/printer.py b/posthog/hogql/printer.py index b44191b41095e..136e8ecab70fd 100644 --- a/posthog/hogql/printer.py +++ b/posthog/hogql/printer.py @@ -1145,7 +1145,10 @@ def visit_window_function(self, node: ast.WindowFunction): identifier = self._print_identifier(node.name) exprs = ", ".join(self.visit(expr) for expr in node.exprs or []) args = "(" + (", ".join(self.visit(arg) for arg in node.args or [])) + ")" if node.args else "" - over = f"({self.visit(node.over_expr)})" if node.over_expr else self._print_identifier(node.over_identifier) + if node.over_expr or node.over_identifier: + over = f"({self.visit(node.over_expr)})" if node.over_expr else self._print_identifier(node.over_identifier) + else: + over = "()" return f"{identifier}({exprs}){args} OVER {over}" def visit_window_frame_expr(self, node: ast.WindowFrameExpr): diff --git a/posthog/hogql_queries/insights/trends/breakdown.py b/posthog/hogql_queries/insights/trends/breakdown.py index 49491429cf54f..9c8af0064ba42 100644 --- a/posthog/hogql_queries/insights/trends/breakdown.py +++ b/posthog/hogql_queries/insights/trends/breakdown.py @@ -3,12 +3,6 @@ from posthog.hogql.constants import LimitContext from posthog.hogql.parser import parse_expr from posthog.hogql.timings import HogQLTimings -from posthog.hogql_queries.insights.trends.breakdown_values import ( - BREAKDOWN_NULL_STRING_LABEL, - BREAKDOWN_OTHER_STRING_LABEL, - BreakdownValues, -) -from posthog.hogql_queries.insights.trends.display import TrendsDisplay from posthog.hogql_queries.insights.trends.utils import ( get_properties_chain, ) @@ -17,6 +11,11 @@ from posthog.models.team.team import Team from posthog.schema import ActionsNode, EventsNode, DataWarehouseNode, HogQLQueryModifiers, InCohortVia, TrendsQuery +BREAKDOWN_OTHER_STRING_LABEL = "$$_posthog_breakdown_other_$$" +BREAKDOWN_NULL_STRING_LABEL = "$$_posthog_breakdown_null_$$" +BREAKDOWN_OTHER_DISPLAY = "Other (i.e. all remaining values)" +BREAKDOWN_NULL_DISPLAY = "None (i.e. no value)" + def hogql_to_string(expr: ast.Expr) -> ast.Call: return ast.Call(name="toString", args=[expr]) @@ -30,7 +29,6 @@ class Breakdown: timings: HogQLTimings modifiers: HogQLQueryModifiers events_filter: ast.Expr - breakdown_values_override: Optional[list[str | int]] limit_context: LimitContext def __init__( @@ -42,7 +40,6 @@ def __init__( timings: HogQLTimings, modifiers: HogQLQueryModifiers, events_filter: ast.Expr, - breakdown_values_override: Optional[list[str | int]] = None, limit_context: LimitContext = LimitContext.QUERY, ): self.team = team @@ -52,34 +49,19 @@ def __init__( self.timings = timings self.modifiers = modifiers self.events_filter = events_filter - self.breakdown_values_override = breakdown_values_override self.limit_context = limit_context @cached_property def enabled(self) -> bool: - return ( - self.query.breakdownFilter is not None - and self.query.breakdownFilter.breakdown is not None - and self.has_breakdown_values - ) - - @cached_property - def is_session_type(self) -> bool: - return self.enabled and self.query.breakdownFilter.breakdown_type == "session" + return self.query.breakdownFilter is not None and self.query.breakdownFilter.breakdown is not None @cached_property def is_histogram_breakdown(self) -> bool: return self.enabled and self.query.breakdownFilter.breakdown_histogram_bin_count is not None - def placeholders(self) -> dict[str, ast.Expr]: - values = self._breakdown_buckets_ast if self.is_histogram_breakdown else self._breakdown_values_ast - - return {"cross_join_breakdown_values": ast.Alias(alias="breakdown_value", expr=values)} - def column_expr(self) -> ast.Alias: if self.is_histogram_breakdown: - return ast.Alias(alias="breakdown_value", expr=self._get_breakdown_histogram_multi_if()) - + return ast.Alias(alias="breakdown_value", expr=ast.Field(chain=self._properties_chain)) if self.query.breakdownFilter.breakdown_type == "cohort": if self.modifiers.inCohortVia == InCohortVia.LEFTJOIN_CONJOINED: return ast.Alias( @@ -95,19 +77,15 @@ def column_expr(self) -> ast.Alias: expr=hogql_to_string(ast.Constant(value=cohort_breakdown)), ) - return ast.Alias(alias="breakdown_value", expr=self._get_breakdown_transform_func) + return ast.Alias(alias="breakdown_value", expr=self._get_breakdown_expression) - def events_where_filter(self) -> ast.Expr | None: + def events_where_filter(self, breakdown_values_override: Optional[str | int] = None) -> ast.Expr | None: if ( self.query.breakdownFilter is not None and self.query.breakdownFilter.breakdown is not None and self.query.breakdownFilter.breakdown_type == "cohort" ): - breakdown = ( - self.breakdown_values_override - if self.breakdown_values_override - else self.query.breakdownFilter.breakdown - ) + breakdown = breakdown_values_override if breakdown_values_override else self.query.breakdownFilter.breakdown if breakdown == "all": return None @@ -136,60 +114,33 @@ def events_where_filter(self) -> ast.Expr | None: right=ast.Constant(value=breakdown), ) - # No need to filter if we're showing the "other" bucket, as we need to look at all events anyway. - # Except when explicitly filtering - if ( - self.query.breakdownFilter is not None - and not self.query.breakdownFilter.breakdown_hide_other_aggregation - and len(self.breakdown_values_override or []) == 0 - ): - return ast.Constant(value=True) - - if ( - self.query.breakdownFilter is not None - and self.query.breakdownFilter.breakdown is not None - and self.query.breakdownFilter.breakdown_type == "hogql" - and isinstance(self.query.breakdownFilter.breakdown, str) - ): - left = parse_expr(self.query.breakdownFilter.breakdown) - else: - left = ast.Field(chain=self._properties_chain) - - if not self.is_histogram_breakdown: - left = hogql_to_string(left) - - compare_ops = [] - for _value in self._breakdown_values: - value: Optional[str] = str(_value) # non-cohorts are always strings - # If the value is one of the "other" values, then use the `transform()` func + if breakdown_values_override: + if ( + self.query.breakdownFilter is not None + and self.query.breakdownFilter.breakdown is not None + and self.query.breakdownFilter.breakdown_type == "hogql" + and isinstance(self.query.breakdownFilter.breakdown, str) + ): + left = parse_expr(self.query.breakdownFilter.breakdown) + else: + left = ast.Field(chain=self._properties_chain) + value: Optional[str] = str(breakdown_values_override) # non-cohorts are always strings if value == BREAKDOWN_OTHER_STRING_LABEL: - transform_func = self._get_breakdown_transform_func - compare_ops.append( - ast.CompareOperation( - left=transform_func, op=ast.CompareOperationOp.Eq, right=ast.Constant(value=value) - ) - ) + # TODO: Fix breaking down by other + return ast.Constant(value=True) elif value == BREAKDOWN_NULL_STRING_LABEL: - compare_ops.append( - ast.CompareOperation(left=left, op=ast.CompareOperationOp.Eq, right=ast.Constant(value=None)) - ) - compare_ops.append( - ast.CompareOperation(left=left, op=ast.CompareOperationOp.Eq, right=ast.Constant(value="")) + return ast.Or( + exprs=[ + ast.CompareOperation(left=left, op=ast.CompareOperationOp.Eq, right=ast.Constant(value=None)), + ast.CompareOperation(left=left, op=ast.CompareOperationOp.Eq, right=ast.Constant(value="")), + ] ) else: - compare_ops.append( - ast.CompareOperation(left=left, op=ast.CompareOperationOp.Eq, right=ast.Constant(value=value)) - ) - - if len(compare_ops) == 1: - return compare_ops[0] - elif len(compare_ops) == 0: - return parse_expr("1 = 1") - - return ast.Or(exprs=compare_ops) + return ast.CompareOperation(left=left, op=ast.CompareOperationOp.Eq, right=ast.Constant(value=value)) + return ast.Constant(value=True) @cached_property - def _get_breakdown_transform_func(self) -> ast.Call: + def _get_breakdown_expression(self) -> ast.Call: if self.query.breakdownFilter.breakdown_type == "hogql": return self._get_breakdown_values_transform(parse_expr(self.query.breakdownFilter.breakdown)) return self._get_breakdown_values_transform(ast.Field(chain=self._properties_chain)) @@ -202,115 +153,14 @@ def _get_breakdown_values_transform(self, node: ast.Expr) -> ast.Call: return cast( ast.Call, parse_expr( - "transform(ifNull(nullIf(toString({node}), ''), {nil}), {values}, {values}, {other})", + "ifNull(nullIf(toString({node}), ''), {nil})", placeholders={ "node": node, - "values": self._breakdown_values_ast, "nil": ast.Constant(value=BREAKDOWN_NULL_STRING_LABEL), - "other": ast.Constant(value=BREAKDOWN_OTHER_STRING_LABEL), }, ), ) - @cached_property - def _breakdown_buckets_ast(self) -> ast.Array: - buckets = self._get_breakdown_histogram_buckets() - values = [f"[{t[0]},{t[1]}]" for t in buckets] - # TODO: add this only if needed - values.append('["",""]') - - return ast.Array(exprs=[ast.Constant(value=v) for v in values]) - - @property - def _breakdown_values_ast(self) -> ast.Array: - exprs: list[ast.Expr] = [] - for value in self._breakdown_values: - if isinstance(value, str): - exprs.append(ast.Constant(value=value)) - else: - exprs.append(hogql_to_string(ast.Constant(value=value))) - return ast.Array(exprs=exprs) - - @cached_property - def _all_breakdown_values(self) -> list[str | int | None]: - # Used in the actors query - if self.breakdown_values_override is not None: - return cast(list[str | int | None], self.breakdown_values_override) - - if self.query.breakdownFilter is None: - return [] - - with self.timings.measure("breakdown_values_query"): - breakdown = BreakdownValues( - team=self.team, - series=self.series, - events_filter=self.events_filter, - chart_display_type=self._trends_display().display_type, - breakdown_filter=self.query.breakdownFilter, - query_date_range=self.query_date_range, - modifiers=self.modifiers, - limit_context=self.limit_context, - ) - return cast(list[str | int | None], breakdown.get_breakdown_values()) - - @cached_property - def _breakdown_values(self) -> list[str | int]: - values = [BREAKDOWN_NULL_STRING_LABEL if v is None else v for v in self._all_breakdown_values] - return cast(list[str | int], values) - - @cached_property - def has_breakdown_values(self) -> bool: - return len(self._breakdown_values) > 0 - - def _get_breakdown_histogram_buckets(self) -> list[tuple[float, float]]: - buckets = [] - values = self._breakdown_values - - if len(values) == 1: - values = [values[0], values[0]] - - for i in range(len(values) - 1): - last_value = i == len(values) - 2 - - # Since we always `floor(x, 2)` the value, we add 0.01 to the last bucket - # to ensure it's always slightly greater than the maximum value - lower_bound = float(values[i]) - upper_bound = float(values[i + 1]) + 0.01 if last_value else float(values[i + 1]) - buckets.append((lower_bound, upper_bound)) - - return buckets - - def _get_breakdown_histogram_multi_if(self) -> ast.Expr: - multi_if_exprs: list[ast.Expr] = [] - - buckets = self._get_breakdown_histogram_buckets() - - for lower_bound, upper_bound in buckets: - multi_if_exprs.extend( - [ - ast.And( - exprs=[ - ast.CompareOperation( - left=ast.Field(chain=self._properties_chain), - op=ast.CompareOperationOp.GtEq, - right=ast.Constant(value=lower_bound), - ), - ast.CompareOperation( - left=ast.Field(chain=self._properties_chain), - op=ast.CompareOperationOp.Lt, - right=ast.Constant(value=upper_bound), - ), - ] - ), - ast.Constant(value=f"[{lower_bound},{upper_bound}]"), - ] - ) - - # `else` block of the multi-if - multi_if_exprs.append(ast.Constant(value='["",""]')) - - return ast.Call(name="multiIf", args=multi_if_exprs) - @cached_property def _properties_chain(self): return get_properties_chain( @@ -318,11 +168,3 @@ def _properties_chain(self): breakdown_field=self.query.breakdownFilter.breakdown, group_type_index=self.query.breakdownFilter.breakdown_group_type_index, ) - - def _trends_display(self) -> TrendsDisplay: - display = ( - self.query.trendsFilter.display - if self.query.trendsFilter is not None and self.query.trendsFilter.display is not None - else None - ) - return TrendsDisplay(display) diff --git a/posthog/hogql_queries/insights/trends/breakdown_values.py b/posthog/hogql_queries/insights/trends/breakdown_values.py deleted file mode 100644 index aee02dd9ccefb..0000000000000 --- a/posthog/hogql_queries/insights/trends/breakdown_values.py +++ /dev/null @@ -1,279 +0,0 @@ -from typing import Optional, Union, Any -from posthog.hogql import ast -from posthog.hogql.constants import LimitContext, get_breakdown_limit_for_context, BREAKDOWN_VALUES_LIMIT_FOR_COUNTRIES -from posthog.hogql.parser import parse_expr, parse_select -from posthog.hogql.placeholders import replace_placeholders, find_placeholders -from posthog.hogql.query import execute_hogql_query -from posthog.hogql_queries.insights.trends.aggregation_operations import AggregationOperations -from posthog.hogql_queries.insights.trends.utils import get_properties_chain -from posthog.hogql_queries.utils.query_date_range import QueryDateRange -from posthog.models.team.team import Team -from posthog.schema import ( - BreakdownFilter, - BreakdownType, - ChartDisplayType, - ActionsNode, - EventsNode, - DataWarehouseNode, - HogQLQueryModifiers, -) -from functools import cached_property - -BREAKDOWN_OTHER_STRING_LABEL = "$$_posthog_breakdown_other_$$" -BREAKDOWN_OTHER_NUMERIC_LABEL = 9007199254740991 # pow(2, 53) - 1, for JS compatibility -BREAKDOWN_OTHER_DISPLAY = "Other (i.e. all remaining values)" -BREAKDOWN_NULL_STRING_LABEL = "$$_posthog_breakdown_null_$$" -BREAKDOWN_NULL_NUMERIC_LABEL = 9007199254740990 # pow(2, 53) - 2, for JS compatibility -BREAKDOWN_NULL_DISPLAY = "None (i.e. no value)" - - -class BreakdownValues: - team: Team - series: Union[EventsNode, ActionsNode, DataWarehouseNode] - breakdown_field: Union[str, float, list[Union[str, float]]] - breakdown_type: BreakdownType - events_filter: ast.Expr - chart_display_type: ChartDisplayType - histogram_bin_count: Optional[int] - group_type_index: Optional[int] - hide_other_aggregation: Optional[bool] - normalize_url: Optional[bool] - breakdown_limit: int - query_date_range: QueryDateRange - modifiers: HogQLQueryModifiers - limit_context: LimitContext - - def __init__( - self, - team: Team, - series: Union[EventsNode, ActionsNode, DataWarehouseNode], - events_filter: ast.Expr, - chart_display_type: ChartDisplayType, - breakdown_filter: BreakdownFilter, - query_date_range: QueryDateRange, - modifiers: HogQLQueryModifiers, - limit_context: LimitContext = LimitContext.QUERY, - ): - self.team = team - self.series = series - self.breakdown_field = breakdown_filter.breakdown # type: ignore - self.breakdown_type = breakdown_filter.breakdown_type # type: ignore - self.events_filter = events_filter - self.chart_display_type = chart_display_type - self.histogram_bin_count = ( - int(breakdown_filter.breakdown_histogram_bin_count) - if breakdown_filter.breakdown_histogram_bin_count is not None - else None - ) - self.group_type_index = ( - int(breakdown_filter.breakdown_group_type_index) - if breakdown_filter.breakdown_group_type_index is not None - else None - ) - self.hide_other_aggregation = breakdown_filter.breakdown_hide_other_aggregation - self.normalize_url = breakdown_filter.breakdown_normalize_url - self.breakdown_limit = breakdown_filter.breakdown_limit or get_breakdown_limit_for_context(limit_context) - self.query_date_range = query_date_range - self.modifiers = modifiers - self.limit_context = limit_context - - def get_breakdown_values(self) -> list[str | int]: - if self.breakdown_type == "cohort": - if self.breakdown_field == "all": - return [0] - - if isinstance(self.breakdown_field, list): - return [value if isinstance(value, str) else int(value) for value in self.breakdown_field] - - return [self.breakdown_field if isinstance(self.breakdown_field, str) else int(self.breakdown_field)] - - if self.breakdown_type == "hogql": - select_field = ast.Alias( - alias="value", - expr=parse_expr(str(self.breakdown_field)), - ) - else: - select_field = ast.Alias( - alias="value", - expr=ast.Field( - chain=get_properties_chain( - breakdown_type=self.breakdown_type, - breakdown_field=str(self.breakdown_field), - group_type_index=self.group_type_index, - ) - ), - ) - - if not self.histogram_bin_count: - if self.normalize_url: - select_field.expr = parse_expr( - "empty(trimRight({node}, '/?#')) ? '/' : trimRight({node}, '/?#')", - placeholders={"node": select_field.expr}, - ) - - select_field.expr = ast.Call(name="toString", args=[select_field.expr]) - - if self.chart_display_type == ChartDisplayType.WORLD_MAP: - breakdown_limit = BREAKDOWN_VALUES_LIMIT_FOR_COUNTRIES - else: - breakdown_limit = int(self.breakdown_limit) - - aggregation_expression: ast.Expr - if self._aggregation_operation.aggregating_on_session_duration(): - aggregation_expression = ast.Call(name="max", args=[ast.Field(chain=["session", "$session_duration"])]) - elif self.series.math == "dau": - # When aggregating by (daily) unique users, run the breakdown aggregation on count(e.uuid). - # This retains legacy compatibility and should be removed once we have the new trends in production. - aggregation_expression = parse_expr("count({id_field})", placeholders={"id_field": self._id_field}) - else: - aggregation_expression = self._aggregation_operation.select_aggregation() - # Take a shortcut with WAU and MAU queries. Get the total AU-s for the period instead. - if "replaced" in find_placeholders(aggregation_expression): - actor = "e.distinct_id" if self.team.aggregate_users_by_distinct_id else "e.person_id" - replaced = parse_expr(f"count(DISTINCT {actor})") - aggregation_expression = replace_placeholders(aggregation_expression, {"replaced": replaced}) - - timestamp_field = self.series.timestamp_field if hasattr(self.series, "timestamp_field") else "timestamp" - date_filter = ast.And( - exprs=[ - parse_expr( - "{timestamp} >= {date_from_with_adjusted_start_of_interval}", - placeholders={ - **self.query_date_range.to_placeholders(), - "timestamp": ast.Field(chain=[timestamp_field]), - }, - ), - parse_expr( - "{timestamp} <= {date_to}", - placeholders={ - **self.query_date_range.to_placeholders(), - "timestamp": ast.Field(chain=[timestamp_field]), - }, - ), - ] - ) - - inner_events_query = parse_select( - """ - SELECT - {select_field}, - {aggregation_expression} as count - FROM {table} e - WHERE - {date_filter} and {events_where} - GROUP BY - value - ORDER BY - count DESC, - value DESC - LIMIT {breakdown_limit_plus_one} - """, - placeholders={ - "select_field": select_field, - "aggregation_expression": aggregation_expression, - "table": self._table, - "date_filter": date_filter, - "events_where": self.events_filter, - "breakdown_limit_plus_one": ast.Constant(value=breakdown_limit + 1), - }, - ) - - # Reverse the order if looking at the smallest values - if self.series.math_property is not None and self.series.math == "min": - if ( - isinstance(inner_events_query, ast.SelectQuery) - and inner_events_query.order_by is not None - and isinstance(inner_events_query.order_by[0], ast.OrderExpr) - ): - inner_events_query.order_by[0].order = "ASC" - - values: list[Any] - if self.histogram_bin_count is not None: - query = parse_select( - """ - SELECT {expr} FROM ({inner_events_query}) - """, - placeholders={ - "inner_events_query": inner_events_query, - "expr": self._to_bucketing_expression(), - }, - ) - response = execute_hogql_query( - query_type="TrendsQueryBreakdownValues", - query=query, - team=self.team, - modifiers=self.modifiers, - limit_context=self.limit_context, - ) - if response.results and len(response.results) > 0: - values = response.results[0][0] - else: - values = [] - else: - # We're not running this through groupArray, as that eats NULL values. - query = inner_events_query - response = execute_hogql_query( - query_type="TrendsQueryBreakdownValues", - query=query, - team=self.team, - modifiers=self.modifiers, - limit_context=self.limit_context, - ) - value_index = (response.columns or []).index("value") - values = [row[value_index] for row in response.results or []] - - needs_other = False - if len(values) == breakdown_limit + 1: - needs_other = True - values = values[:-1] - - # Add "other" value if "other" is not hidden and we're not bucketing numeric values - if self.hide_other_aggregation is not True and self.histogram_bin_count is None: - values = [BREAKDOWN_NULL_STRING_LABEL if value in (None, "") else value for value in values] - if needs_other: - values = [BREAKDOWN_OTHER_STRING_LABEL, *values] - - if len(values) == 0: - values.insert(0, None) - return values - - return values - - def _to_bucketing_expression(self) -> ast.Expr: - assert isinstance(self.histogram_bin_count, int) - - if self.histogram_bin_count <= 1: - qunatile_expression = "quantiles(0,1)(value)" - else: - quantiles = [] - bin_size = 1.0 / self.histogram_bin_count - for i in range(self.histogram_bin_count + 1): - quantiles.append(i * bin_size) - - qunatile_expression = f"quantiles({','.join([f'{quantile:.2f}' for quantile in quantiles])})(value)" - - return parse_expr(f"arrayCompact(arrayMap(x -> floor(x, 2), {qunatile_expression}))") - - @cached_property - def _id_field(self) -> ast.Field: - if isinstance(self.series, DataWarehouseNode): - return ast.Field(chain=["e", self.series.id_field]) - - return ast.Field(chain=["e", "uuid"]) - - @cached_property - def _table(self) -> ast.Field: - if isinstance(self.series, DataWarehouseNode): - return ast.Field(chain=[self.series.table_name]) - - return ast.Field(chain=["events"]) - - @cached_property - def _aggregation_operation(self) -> AggregationOperations: - return AggregationOperations( - self.team, - self.series, - self.chart_display_type, - self.query_date_range, - is_total_value=True, # doesn't matter in this case - ) diff --git a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr index 80d7c6c77345e..f7cca83fa28a4 100644 --- a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr +++ b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr @@ -184,61 +184,42 @@ # --- # name: TestTrends.test_breakdown_by_group_props_person_on_events ''' - SELECT toString(e__group_0.properties___industry) AS value, - count(e.uuid) AS count - FROM events AS e - LEFT JOIN - (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, - groups.group_type_index AS index, - groups.group_key AS key - FROM groups - WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) - GROUP BY groups.group_type_index, - groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 - ''' -# --- -# name: TestTrends.test_breakdown_by_group_props_person_on_events.1 - ''' - SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, - ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value + SELECT groupArray(1)(date)[1] AS date, + arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value FROM - (SELECT sum(total) AS count, - day_start AS day_start, - breakdown_value AS breakdown_value + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value, + rowNumberInAllBlocks() AS row_number FROM - (SELECT count(e.uuid) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - transform(ifNull(nullIf(toString(e__group_0.properties___industry), ''), '$$_posthog_breakdown_null_$$'), ['finance', 'technology'], ['finance', 'technology'], '$$_posthog_breakdown_other_$$') AS breakdown_value - FROM events AS e SAMPLE 1 - LEFT JOIN - (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, - groups.group_type_index AS index, - groups.group_key AS key - FROM groups - WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) - GROUP BY groups.group_type_index, - groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), true) + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + ifNull(nullIf(toString(e__group_0.properties___industry), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), true) + GROUP BY day_start, + breakdown_value) GROUP BY day_start, - breakdown_value) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC) + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY arraySum(total) DESC, breakdown_value ASC) GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)), - arraySum(total) DESC, breakdown_value ASC + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1, @@ -248,7 +229,7 @@ max_query_size=524288 ''' # --- -# name: TestTrends.test_breakdown_by_group_props_person_on_events.2 +# name: TestTrends.test_breakdown_by_group_props_person_on_events.1 ''' SELECT persons.id AS id, toTimeZone(persons.created_at, 'UTC') AS created_at, @@ -273,7 +254,7 @@ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) GROUP BY groups.group_type_index, groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) - WHERE and(equals(e.team_id, 2), equals(e.event, 'sign up'), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-02 00:00:00.000000', 6, 'UTC')), less(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-03 00:00:00.000000', 6, 'UTC')), ifNull(equals(toString(e__group_0.properties___industry), 'technology'), 0))) + WHERE and(equals(e.team_id, 2), equals(e.event, 'sign up'), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-02 00:00:00.000000', 6, 'UTC')), less(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-03 00:00:00.000000', 6, 'UTC')), ifNull(equals(e__group_0.properties___industry, 'technology'), 0))) GROUP BY actor_id) AS source INNER JOIN (SELECT argMax(person.created_at, person.version) AS created_at, @@ -293,6 +274,20 @@ max_query_size=524288 ''' # --- +# name: TestTrends.test_breakdown_by_group_props_person_on_events.2 + ''' + SELECT DISTINCT session_replay_events.session_id AS session_id + FROM session_replay_events + WHERE and(equals(session_replay_events.team_id, 2), ifNull(greaterOrEquals(toTimeZone(session_replay_events.min_first_timestamp, 'UTC'), minus(toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC'), toIntervalDay(21))), 0), in(session_replay_events.session_id, [''])) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=1000000, + max_expanded_ast_elements=1000000, + max_query_size=524288 + ''' +# --- # name: TestTrends.test_breakdown_by_group_props_person_on_events.3 ''' SELECT DISTINCT session_replay_events.session_id AS session_id @@ -309,27 +304,49 @@ # --- # name: TestTrends.test_breakdown_by_group_props_with_person_filter_person_on_events ''' - SELECT toString(e__group_0.properties___industry) AS value, - count(e.uuid) AS count - FROM events AS e - LEFT JOIN - (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, - groups.group_type_index AS index, - groups.group_key AS key - FROM groups - WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) - GROUP BY groups.group_type_index, - groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, 'key'), ''), 'null'), '^"|"$', ''), 'value'), 0))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 + SELECT groupArray(1)(date)[1] AS date, + arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + FROM + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value, + rowNumberInAllBlocks() AS row_number + FROM + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + ifNull(nullIf(toString(e__group_0.properties___industry), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, 'key'), ''), 'null'), '^"|"$', ''), 'value'), 0), true) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY arraySum(total) DESC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=1000000, + max_expanded_ast_elements=1000000, + max_query_size=524288 ''' # --- # name: TestTrends.test_breakdown_by_group_props_with_person_filter_person_on_events.1 @@ -375,45 +392,73 @@ # --- # name: TestTrends.test_breakdown_filtering_with_properties_in_new_format ''' - SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', '')) AS value, - count(e.uuid) AS count - FROM events AS e - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), 'Firefox'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Windows'), 0)), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 + SELECT groupArray(1)(date)[1] AS date, + arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + FROM + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value, + rowNumberInAllBlocks() AS row_number + FROM + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), 'Firefox'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Windows'), 0)), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0), true) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY arraySum(total) DESC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=1000000, + max_expanded_ast_elements=1000000, + max_query_size=524288 ''' # --- # name: TestTrends.test_breakdown_filtering_with_properties_in_new_format.1 ''' - SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, - ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value + SELECT groupArray(1)(date)[1] AS date, + arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value FROM - (SELECT sum(total) AS count, - day_start AS day_start, - breakdown_value AS breakdown_value + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value, + rowNumberInAllBlocks() AS row_number FROM - (SELECT count(e.uuid) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - transform(ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$'), ['second url'], ['second url'], '$$_posthog_breakdown_other_$$') AS breakdown_value - FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), 'Firefox'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Windows'), 0)), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0), true) + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), 'Firefox'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Windows'), 0)), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0), true) + GROUP BY day_start, + breakdown_value) GROUP BY day_start, - breakdown_value) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC) + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY arraySum(total) DESC, breakdown_value ASC) GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)), - arraySum(total) DESC, breakdown_value ASC + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1, @@ -475,26 +520,44 @@ # --- # name: TestTrends.test_breakdown_weekly_active_users_aggregated ''' - SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')) AS value, - count(DISTINCT e__pdi.person_id) AS count - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC')))), equals(e.event, '$pageview')) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 + SELECT count(DISTINCT actor_id) AS total, + breakdown_value AS breakdown_value + FROM + (SELECT d.timestamp AS timestamp, + e.actor_id AS actor_id, + e.breakdown_value AS breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS timestamp + FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC')))) AS numbers) AS d + CROSS JOIN + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS actor_id, + ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), true), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0)) + GROUP BY timestamp, actor_id, + breakdown_value) AS e + WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) + GROUP BY d.timestamp, + e.actor_id, + e.breakdown_value + ORDER BY d.timestamp ASC) + WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0)) + GROUP BY breakdown_value + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=1000000, + max_expanded_ast_elements=1000000, + max_query_size=524288 ''' # --- # name: TestTrends.test_breakdown_weekly_active_users_aggregated.1 @@ -541,26 +604,44 @@ # --- # name: TestTrends.test_breakdown_weekly_active_users_aggregated_materialized ''' - SELECT toString(nullIf(nullIf(e.mat_key, ''), 'null')) AS value, - count(DISTINCT e__pdi.person_id) AS count - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC')))), equals(e.event, '$pageview')) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 + SELECT count(DISTINCT actor_id) AS total, + breakdown_value AS breakdown_value + FROM + (SELECT d.timestamp AS timestamp, + e.actor_id AS actor_id, + e.breakdown_value AS breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS timestamp + FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC')))) AS numbers) AS d + CROSS JOIN + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS actor_id, + ifNull(nullIf(toString(nullIf(nullIf(e.mat_key, ''), 'null')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), true), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0)) + GROUP BY timestamp, actor_id, + breakdown_value) AS e + WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) + GROUP BY d.timestamp, + e.actor_id, + e.breakdown_value + ORDER BY d.timestamp ASC) + WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0)) + GROUP BY breakdown_value + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=1000000, + max_expanded_ast_elements=1000000, + max_query_size=524288 ''' # --- # name: TestTrends.test_breakdown_weekly_active_users_aggregated_materialized.1 @@ -627,40 +708,78 @@ # --- # name: TestTrends.test_breakdown_weekly_active_users_daily_based_on_action.2 ''' - SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')) AS value, - count(DISTINCT e__pdi.person_id) AS count - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - LEFT JOIN - (SELECT person.id AS id, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'name'), ''), 'null'), '^"|"$', '') AS properties___name - FROM person - WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), - (SELECT person.id AS id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(person.created_at, person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC')))), and(equals(e.event, '$pageview'), and(or(ifNull(equals(e__pdi__person.properties___name, 'p1'), 0), ifNull(equals(e__pdi__person.properties___name, 'p2'), 0), ifNull(equals(e__pdi__person.properties___name, 'p3'), 0)), ifNull(in(e__pdi.person_id, - (SELECT cohortpeople.person_id AS person_id - FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2), equals(cohortpeople.version, 0)))), 0)))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 + SELECT groupArray(1)(date)[1] AS date, + arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + FROM + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value, + rowNumberInAllBlocks() AS row_number + FROM + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT counts AS total, + toStartOfDay(timestamp) AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT d.timestamp AS timestamp, + count(DISTINCT e.actor_id) AS counts, + e.breakdown_value AS breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS timestamp + FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC')))) AS numbers) AS d + CROSS JOIN + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS actor_id, + ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + LEFT JOIN + (SELECT person.id AS id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'name'), ''), 'null'), '^"|"$', '') AS properties___name + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(person.created_at, person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), and(and(equals(e.event, '$pageview'), and(or(ifNull(equals(e__pdi__person.properties___name, 'p1'), 0), ifNull(equals(e__pdi__person.properties___name, 'p2'), 0), ifNull(equals(e__pdi__person.properties___name, 'p3'), 0)), ifNull(in(e__pdi.person_id, + (SELECT cohortpeople.person_id AS person_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2), equals(cohortpeople.version, 0)))), 0))), true), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0)) + GROUP BY timestamp, actor_id, + breakdown_value) AS e + WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) + GROUP BY d.timestamp, + e.breakdown_value + ORDER BY d.timestamp ASC) + WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0))) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY arraySum(total) DESC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=1000000, + max_expanded_ast_elements=1000000, + max_query_size=524288 ''' # --- # name: TestTrends.test_breakdown_weekly_active_users_daily_based_on_action.3 @@ -735,27 +854,49 @@ # --- # name: TestTrends.test_breakdown_with_filter_groups_person_on_events ''' - SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')) AS value, - count(e.uuid) AS count - FROM events AS e - LEFT JOIN - (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, - groups.group_type_index AS index, - groups.group_key AS key - FROM groups - WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) - GROUP BY groups.group_type_index, - groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(e__group_0.properties___industry, 'finance'), 0))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 + SELECT groupArray(1)(date)[1] AS date, + arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + FROM + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value, + rowNumberInAllBlocks() AS row_number + FROM + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(e__group_0.properties___industry, 'finance'), 0), true) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY arraySum(total) DESC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=1000000, + max_expanded_ast_elements=1000000, + max_query_size=524288 ''' # --- # name: TestTrends.test_breakdown_with_filter_groups_person_on_events.1 @@ -810,27 +951,56 @@ # --- # name: TestTrends.test_breakdown_with_filter_groups_person_on_events_v2.1 ''' - SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')) AS value, - count(e.uuid) AS count - FROM events AS e - LEFT JOIN - (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, - groups.group_type_index AS index, - groups.group_key AS key - FROM groups - WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) - GROUP BY groups.group_type_index, - groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(e__group_0.properties___industry, 'finance'), 0))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 + SELECT groupArray(1)(date)[1] AS date, + arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + FROM + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value, + rowNumberInAllBlocks() AS row_number + FROM + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT count(DISTINCT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id)) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 2) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)) AS e__override ON equals(e.distinct_id, e__override.distinct_id) + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(e__group_0.properties___industry, 'finance'), 0), true) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY arraySum(total) DESC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=1000000, + max_expanded_ast_elements=1000000, + max_query_size=524288 ''' # --- # name: TestTrends.test_breakdown_with_filter_groups_person_on_events_v2.2 @@ -883,52 +1053,87 @@ # --- # name: TestTrends.test_dau_with_breakdown_filtering_with_sampling ''' - SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')) AS value, - count(e.uuid) AS count - FROM events AS e - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 + SELECT groupArray(1)(date)[1] AS date, + arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + FROM + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value, + rowNumberInAllBlocks() AS row_number + FROM + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT count(DISTINCT e__pdi.person_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1.0 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), true) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY arraySum(total) DESC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=1000000, + max_expanded_ast_elements=1000000, + max_query_size=524288 ''' # --- # name: TestTrends.test_dau_with_breakdown_filtering_with_sampling.1 ''' - SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, - ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value + SELECT groupArray(1)(date)[1] AS date, + arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value FROM - (SELECT sum(total) AS count, - day_start AS day_start, - breakdown_value AS breakdown_value + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value, + rowNumberInAllBlocks() AS row_number FROM - (SELECT count(DISTINCT e__pdi.person_id) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - transform(ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$'), ['other_value', '$$_posthog_breakdown_null_$$', 'value'], ['other_value', '$$_posthog_breakdown_null_$$', 'value'], '$$_posthog_breakdown_other_$$') AS breakdown_value - FROM events AS e SAMPLE 1.0 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), true) + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT count(DISTINCT e__pdi.person_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1.0 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), true) + GROUP BY day_start, + breakdown_value) GROUP BY day_start, - breakdown_value) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC) + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY arraySum(total) DESC, breakdown_value ASC) GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)), - arraySum(total) DESC, breakdown_value ASC + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1, @@ -1290,38 +1495,76 @@ # --- # name: TestTrends.test_mau_with_breakdown_filtering_and_prop_filter ''' - SELECT toString(e__pdi__person.`properties___$some_prop`) AS value, - count(DISTINCT e__pdi.person_id) AS count - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - LEFT JOIN - (SELECT person.id AS id, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS `properties___$some_prop`, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'filter_prop'), ''), 'null'), '^"|"$', '') AS properties___filter_prop - FROM person - WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), - (SELECT person.id AS id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(person.created_at, person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(equals(e.event, 'sign up'), ifNull(equals(e__pdi__person.properties___filter_prop, 'filter_val'), 0))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 + SELECT groupArray(1)(date)[1] AS date, + arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + FROM + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value, + rowNumberInAllBlocks() AS row_number + FROM + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT counts AS total, + toStartOfDay(timestamp) AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT d.timestamp AS timestamp, + count(DISTINCT e.actor_id) AS counts, + e.breakdown_value AS breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS timestamp + FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(30)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))) AS numbers) AS d + CROSS JOIN + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS actor_id, + ifNull(nullIf(toString(e__pdi__person.`properties___$some_prop`), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + LEFT JOIN + (SELECT person.id AS id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS `properties___$some_prop`, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'filter_prop'), ''), 'null'), '^"|"$', '') AS properties___filter_prop + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(person.created_at, person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), and(equals(e.event, 'sign up'), ifNull(equals(e__pdi__person.properties___filter_prop, 'filter_val'), 0), true), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), toIntervalDay(30))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) + GROUP BY timestamp, actor_id, + breakdown_value) AS e + WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(29))), 0)) + GROUP BY d.timestamp, + e.breakdown_value + ORDER BY d.timestamp ASC) + WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0))) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY arraySum(total) DESC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=1000000, + max_expanded_ast_elements=1000000, + max_query_size=524288 ''' # --- # name: TestTrends.test_mau_with_breakdown_filtering_and_prop_filter.1 @@ -1394,26 +1637,64 @@ # --- # name: TestTrends.test_mau_with_breakdown_filtering_and_prop_filter_poe_v2 ''' - SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, '$some_prop'), ''), 'null'), '^"|"$', '')) AS value, - count(DISTINCT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id)) AS count - FROM events AS e - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 2) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(equals(e.event, 'sign up'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, 'filter_prop'), ''), 'null'), '^"|"$', ''), 'filter_val'), 0))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 + SELECT groupArray(1)(date)[1] AS date, + arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + FROM + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value, + rowNumberInAllBlocks() AS row_number + FROM + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT counts AS total, + toStartOfDay(timestamp) AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT d.timestamp AS timestamp, + count(DISTINCT e.actor_id) AS counts, + e.breakdown_value AS breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS timestamp + FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(30)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))) AS numbers) AS d + CROSS JOIN + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS actor_id, + ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, '$some_prop'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 2) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)) AS e__override ON equals(e.distinct_id, e__override.distinct_id) + WHERE and(equals(e.team_id, 2), and(equals(e.event, 'sign up'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, 'filter_prop'), ''), 'null'), '^"|"$', ''), 'filter_val'), 0), true), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), toIntervalDay(30))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) + GROUP BY timestamp, actor_id, + breakdown_value) AS e + WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(29))), 0)) + GROUP BY d.timestamp, + e.breakdown_value + ORDER BY d.timestamp ASC) + WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0))) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY arraySum(total) DESC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=1000000, + max_expanded_ast_elements=1000000, + max_query_size=524288 ''' # --- # name: TestTrends.test_mau_with_breakdown_filtering_and_prop_filter_poe_v2.1 @@ -1520,29 +1801,51 @@ # --- # name: TestTrends.test_person_filtering_in_cohort_in_action.2 ''' - SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')) AS value, - count(e.uuid) AS count - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(e__pdi.person_id, - (SELECT cohortpeople.person_id AS person_id - FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2), equals(cohortpeople.version, 0)))), 0)))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 + SELECT groupArray(1)(date)[1] AS date, + arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + FROM + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value, + rowNumberInAllBlocks() AS row_number + FROM + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(e__pdi.person_id, + (SELECT cohortpeople.person_id AS person_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2), equals(cohortpeople.version, 0)))), 0)), true) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY arraySum(total) DESC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=1000000, + max_expanded_ast_elements=1000000, + max_query_size=524288 ''' # --- # name: TestTrends.test_person_filtering_in_cohort_in_action.3 @@ -1610,29 +1913,51 @@ # --- # name: TestTrends.test_person_filtering_in_cohort_in_action_poe_v2.2 ''' - SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')) AS value, - count(e.uuid) AS count - FROM events AS e - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 2) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id), - (SELECT cohortpeople.person_id AS person_id - FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2), equals(cohortpeople.version, 0)))), 0)))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 + SELECT groupArray(1)(date)[1] AS date, + arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + FROM + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value, + rowNumberInAllBlocks() AS row_number + FROM + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 2) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)) AS e__override ON equals(e.distinct_id, e__override.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id), + (SELECT cohortpeople.person_id AS person_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2), equals(cohortpeople.version, 0)))), 0)), true) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY arraySum(total) DESC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=1000000, + max_expanded_ast_elements=1000000, + max_query_size=524288 ''' # --- # name: TestTrends.test_person_filtering_in_cohort_in_action_poe_v2.3 @@ -2233,19 +2558,48 @@ # --- # name: TestTrends.test_timezones_daily.4 ''' - SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '')) AS value, - count(e.uuid) AS count - FROM events AS e - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 + SELECT groupArray(1)(date)[1] AS date, + arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + FROM + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value, + rowNumberInAllBlocks() AS row_number + FROM + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT count(DISTINCT e__pdi.person_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), true) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY arraySum(total) DESC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=1000000, + max_expanded_ast_elements=1000000, + max_query_size=524288 ''' # --- # name: TestTrends.test_timezones_daily.5 @@ -2421,19 +2775,48 @@ # --- # name: TestTrends.test_timezones_daily_minus_utc.4 ''' - SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '')) AS value, - count(e.uuid) AS count - FROM events AS e - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up'))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 + SELECT groupArray(1)(date)[1] AS date, + arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + FROM + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value, + rowNumberInAllBlocks() AS row_number + FROM + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT count(DISTINCT e__pdi.person_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'America/Phoenix')) AS day_start, + ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up'), true) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY arraySum(total) DESC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=1000000, + max_expanded_ast_elements=1000000, + max_query_size=524288 ''' # --- # name: TestTrends.test_timezones_daily_minus_utc.5 @@ -2609,19 +2992,48 @@ # --- # name: TestTrends.test_timezones_daily_plus_utc.4 ''' - SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '')) AS value, - count(e.uuid) AS count - FROM events AS e - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up'))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 + SELECT groupArray(1)(date)[1] AS date, + arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + FROM + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value, + rowNumberInAllBlocks() AS row_number + FROM + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT count(DISTINCT e__pdi.person_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'Asia/Tokyo')) AS day_start, + ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up'), true) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY arraySum(total) DESC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=1000000, + max_expanded_ast_elements=1000000, + max_query_size=524288 ''' # --- # name: TestTrends.test_timezones_daily_plus_utc.5 @@ -2999,85 +3411,113 @@ # --- # name: TestTrends.test_trend_breakdown_user_props_with_filter_with_partial_property_pushdowns ''' - SELECT toString(e__pdi__person.properties___email) AS value, - count(e.uuid) AS count - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, - argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - LEFT JOIN - (SELECT person.id AS id, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$os'), ''), 'null'), '^"|"$', '') AS `properties___$os`, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$browser'), ''), 'null'), '^"|"$', '') AS `properties___$browser` - FROM person - WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), - (SELECT person.id AS id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(person.created_at, person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(or(ifNull(notILike(e__pdi__person.properties___email, '%@posthog.com%'), 1), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0)), or(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'safari'), 0))))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 - ''' -# --- -# name: TestTrends.test_trend_breakdown_user_props_with_filter_with_partial_property_pushdowns.1 - ''' - SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, - ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value + SELECT groupArray(1)(date)[1] AS date, + arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value FROM - (SELECT sum(total) AS count, - day_start AS day_start, - breakdown_value AS breakdown_value + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value, + rowNumberInAllBlocks() AS row_number FROM - (SELECT count(e.uuid) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - transform(ifNull(nullIf(toString(e__pdi__person.properties___email), ''), '$$_posthog_breakdown_null_$$'), ['test2@posthog.com', 'test@gmail.com', 'test5@posthog.com', 'test4@posthog.com', 'test3@posthog.com'], ['test2@posthog.com', 'test@gmail.com', 'test5@posthog.com', 'test4@posthog.com', 'test3@posthog.com'], '$$_posthog_breakdown_other_$$') AS breakdown_value - FROM events AS e SAMPLE 1 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, - argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - LEFT JOIN - (SELECT person.id AS id, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$os'), ''), 'null'), '^"|"$', '') AS `properties___$os`, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$browser'), ''), 'null'), '^"|"$', '') AS `properties___$browser` - FROM person - WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), - (SELECT person.id AS id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(person.created_at, person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(or(ifNull(notILike(e__pdi__person.properties___email, '%@posthog.com%'), 1), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0)), or(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'safari'), 0))), true) + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + ifNull(nullIf(toString(e__pdi__person.properties___email), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + LEFT JOIN + (SELECT person.id AS id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$os'), ''), 'null'), '^"|"$', '') AS `properties___$os`, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$browser'), ''), 'null'), '^"|"$', '') AS `properties___$browser` + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(person.created_at, person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(or(ifNull(notILike(e__pdi__person.properties___email, '%@posthog.com%'), 1), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0)), or(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'safari'), 0))), true) + GROUP BY day_start, + breakdown_value) GROUP BY day_start, - breakdown_value) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC) + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY arraySum(total) DESC, breakdown_value ASC) GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)), - arraySum(total) DESC, breakdown_value ASC + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=1000000, + max_expanded_ast_elements=1000000, + max_query_size=524288 + ''' +# --- +# name: TestTrends.test_trend_breakdown_user_props_with_filter_with_partial_property_pushdowns.1 + ''' + SELECT groupArray(1)(date)[1] AS date, + arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + FROM + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value, + rowNumberInAllBlocks() AS row_number + FROM + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT count(e.uuid) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + ifNull(nullIf(toString(e__pdi__person.properties___email), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + LEFT JOIN + (SELECT person.id AS id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$os'), ''), 'null'), '^"|"$', '') AS `properties___$os`, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$browser'), ''), 'null'), '^"|"$', '') AS `properties___$browser` + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(person.created_at, person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'chrome'), 0)), and(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0), ifNull(ilike(e__pdi__person.properties___email, '%@posthog.com%'), 0)), true) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY arraySum(total) DESC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1, @@ -3249,81 +3689,52 @@ # --- # name: TestTrends.test_trends_aggregate_by_distinct_id.2 ''' - SELECT toString(e__pdi__person.`properties___$some_prop`) AS value, - count(e.uuid) AS count - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, - argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - LEFT JOIN - (SELECT person.id AS id, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS `properties___$some_prop` - FROM person - WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), - (SELECT person.id AS id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(person.created_at, person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 - ''' -# --- -# name: TestTrends.test_trends_aggregate_by_distinct_id.3 - ''' - SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, - ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value + SELECT groupArray(1)(date)[1] AS date, + arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value FROM - (SELECT sum(total) AS count, - day_start AS day_start, - breakdown_value AS breakdown_value + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value, + rowNumberInAllBlocks() AS row_number FROM - (SELECT count(DISTINCT e.distinct_id) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - transform(ifNull(nullIf(toString(e__pdi__person.`properties___$some_prop`), ''), '$$_posthog_breakdown_null_$$'), ['some_val', '$$_posthog_breakdown_null_$$'], ['some_val', '$$_posthog_breakdown_null_$$'], '$$_posthog_breakdown_other_$$') AS breakdown_value - FROM events AS e SAMPLE 1 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, - argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - LEFT JOIN - (SELECT person.id AS id, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS `properties___$some_prop` - FROM person - WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), - (SELECT person.id AS id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(person.created_at, person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), true) + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT count(DISTINCT e.distinct_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + ifNull(nullIf(toString(e__pdi__person.`properties___$some_prop`), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + LEFT JOIN + (SELECT person.id AS id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS `properties___$some_prop` + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(person.created_at, person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), true) + GROUP BY day_start, + breakdown_value) GROUP BY day_start, - breakdown_value) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC) + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY arraySum(total) DESC, breakdown_value ASC) GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)), - arraySum(total) DESC, breakdown_value ASC + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1, @@ -3333,7 +3744,7 @@ max_query_size=524288 ''' # --- -# name: TestTrends.test_trends_aggregate_by_distinct_id.4 +# name: TestTrends.test_trends_aggregate_by_distinct_id.3 ''' SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))))), 1))) AS date, arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) @@ -3372,7 +3783,7 @@ max_query_size=524288 ''' # --- -# name: TestTrends.test_trends_aggregate_by_distinct_id.5 +# name: TestTrends.test_trends_aggregate_by_distinct_id.4 ''' SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))))), 1))) AS date, arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) @@ -3411,6 +3822,45 @@ max_query_size=524288 ''' # --- +# name: TestTrends.test_trends_aggregate_by_distinct_id.5 + ''' + SELECT groupArray(1)(date)[1] AS date, + arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + FROM + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value, + rowNumberInAllBlocks() AS row_number + FROM + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT count(DISTINCT e.distinct_id) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_prop'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), true) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY arraySum(total) DESC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=1000000, + max_expanded_ast_elements=1000000, + max_query_size=524288 + ''' +# --- # name: TestTrends.test_trends_aggregate_by_distinct_id.6 ''' SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_prop'), ''), 'null'), '^"|"$', '')) AS value, @@ -3515,19 +3965,54 @@ # --- # name: TestTrends.test_trends_breakdown_cumulative ''' - SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')) AS value, - count(e.uuid) AS count - FROM events AS e - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 + SELECT groupArray(1)(date)[1] AS date, + arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + FROM + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayFill(x -> ifNull(greater(x, 0), 0), arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date)) AS total, + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value, + rowNumberInAllBlocks() AS row_number + FROM + (SELECT day_start AS day_start, + sum(count) OVER (PARTITION BY breakdown_value + ORDER BY day_start ASC) AS count, + breakdown_value AS breakdown_value + FROM + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT count(DISTINCT e__pdi.person_id) AS total, + min(toStartOfDay(toTimeZone(e.timestamp, 'UTC'))) AS day_start, + ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), true) + GROUP BY e__pdi.person_id, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + ORDER BY day_start ASC) + GROUP BY breakdown_value + ORDER BY arraySum(total) DESC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=1000000, + max_expanded_ast_elements=1000000, + max_query_size=524288 ''' # --- # name: TestTrends.test_trends_breakdown_cumulative.1 @@ -3578,19 +4063,54 @@ # --- # name: TestTrends.test_trends_breakdown_cumulative_poe_v2 ''' - SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')) AS value, - count(e.uuid) AS count - FROM events AS e - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 + SELECT groupArray(1)(date)[1] AS date, + arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + FROM + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayFill(x -> ifNull(greater(x, 0), 0), arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date)) AS total, + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value, + rowNumberInAllBlocks() AS row_number + FROM + (SELECT day_start AS day_start, + sum(count) OVER (PARTITION BY breakdown_value + ORDER BY day_start ASC) AS count, + breakdown_value AS breakdown_value + FROM + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT count(DISTINCT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id)) AS total, + min(toStartOfDay(toTimeZone(e.timestamp, 'UTC'))) AS day_start, + ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 2) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)) AS e__override ON equals(e.distinct_id, e__override.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), true) + GROUP BY if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id), + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + ORDER BY day_start ASC) + GROUP BY breakdown_value + ORDER BY arraySum(total) DESC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=1000000, + max_expanded_ast_elements=1000000, + max_query_size=524288 ''' # --- # name: TestTrends.test_trends_breakdown_cumulative_poe_v2.1 @@ -3628,8 +4148,62 @@ ORDER BY day_start ASC, breakdown_value ASC) ORDER BY day_start ASC) GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)), - arraySum(total) DESC, breakdown_value ASC + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)), + arraySum(total) DESC, breakdown_value ASC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=1000000, + max_expanded_ast_elements=1000000, + max_query_size=524288 + ''' +# --- +# name: TestTrends.test_trends_breakdown_normalize_url + ''' + SELECT groupArray(1)(date)[1] AS date, + arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + FROM + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayFill(x -> ifNull(greater(x, 0), 0), arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date)) AS total, + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value, + rowNumberInAllBlocks() AS row_number + FROM + (SELECT day_start AS day_start, + sum(count) OVER (PARTITION BY breakdown_value + ORDER BY day_start ASC) AS count, + breakdown_value AS breakdown_value + FROM + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT count(DISTINCT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id)) AS total, + min(toStartOfDay(toTimeZone(e.timestamp, 'UTC'))) AS day_start, + ifNull(nullIf(toString(if(empty(trim(TRAILING '/?#' + FROM replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', ''))), '/', trim(TRAILING '/?#' + FROM replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', '')))), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 2) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)) AS e__override ON equals(e.distinct_id, e__override.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), true) + GROUP BY if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id), + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + ORDER BY day_start ASC) + GROUP BY breakdown_value + ORDER BY arraySum(total) DESC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1, @@ -3639,25 +4213,6 @@ max_query_size=524288 ''' # --- -# name: TestTrends.test_trends_breakdown_normalize_url - ''' - SELECT toString(if(empty(trim(TRAILING '/?#' - FROM replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', ''))), '/', trim(TRAILING '/?#' - FROM replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', '')))) AS value, - count(e.uuid) AS count - FROM events AS e - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 - ''' -# --- # name: TestTrends.test_trends_breakdown_normalize_url.1 ''' SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, @@ -3708,26 +4263,31 @@ # --- # name: TestTrends.test_trends_breakdown_with_session_property_single_aggregate_math_and_breakdown ''' - SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')) AS value, - max(e__session.`$session_duration`) AS count - FROM events AS e - LEFT JOIN - (SELECT dateDiff('second', min(sessions.min_timestamp), max(sessions.max_timestamp)) AS `$session_duration`, - sessions.session_id AS session_id - FROM sessions - WHERE and(equals(sessions.team_id, 2), ifNull(greaterOrEquals(plus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), 0), ifNull(lessOrEquals(minus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0), ifNull(greaterOrEquals(plus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), 0), ifNull(lessOrEquals(minus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) - GROUP BY sessions.session_id, - sessions.session_id) AS e__session ON equals(e.`$session_id`, e__session.session_id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 + SELECT quantile(0.5)(session_duration) AS total, + breakdown_value AS breakdown_value + FROM + (SELECT any(e__session.`$session_duration`) AS session_duration, + ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + LEFT JOIN + (SELECT dateDiff('second', min(sessions.min_timestamp), max(sessions.max_timestamp)) AS `$session_duration`, + sessions.session_id AS session_id + FROM sessions + WHERE and(equals(sessions.team_id, 2), ifNull(greaterOrEquals(plus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), 0), ifNull(lessOrEquals(minus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) + GROUP BY sessions.session_id, + sessions.session_id) AS e__session ON equals(e.`$session_id`, e__session.session_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), true) + GROUP BY e.`$session_id`, + breakdown_value + ORDER BY 1 DESC, breakdown_value DESC) + GROUP BY breakdown_value + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=1000000, + max_expanded_ast_elements=1000000, + max_query_size=524288 ''' # --- # name: TestTrends.test_trends_breakdown_with_session_property_single_aggregate_math_and_breakdown.1 @@ -3736,18 +4296,19 @@ breakdown_value AS breakdown_value FROM (SELECT any(e__session.`$session_duration`) AS session_duration, - transform(ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$'), ['value2', 'value1', '$$_posthog_breakdown_null_$$'], ['value2', 'value1', '$$_posthog_breakdown_null_$$'], '$$_posthog_breakdown_other_$$') AS breakdown_value + ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value FROM events AS e SAMPLE 1 LEFT JOIN (SELECT dateDiff('second', min(sessions.min_timestamp), max(sessions.max_timestamp)) AS `$session_duration`, sessions.session_id AS session_id FROM sessions - WHERE and(equals(sessions.team_id, 2), ifNull(greaterOrEquals(plus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), 0), ifNull(lessOrEquals(minus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) + WHERE and(equals(sessions.team_id, 2), ifNull(greaterOrEquals(plus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(minus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) GROUP BY sessions.session_id, sessions.session_id) AS e__session ON equals(e.`$session_id`, e__session.session_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), true) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), true) GROUP BY e.`$session_id`, - breakdown_value) + breakdown_value + ORDER BY 1 DESC, breakdown_value DESC) GROUP BY breakdown_value LIMIT 50000 SETTINGS readonly=2, max_execution_time=60, @@ -3942,19 +4503,33 @@ # --- # name: TestTrends.test_trends_count_per_user_average_aggregated_with_event_property_breakdown_with_sampling ''' - SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'color'), ''), 'null'), '^"|"$', '')) AS value, - count(e.uuid) AS count - FROM events AS e - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC')))), equals(e.event, 'viewed video')) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 + SELECT total AS total, + breakdown_value AS breakdown_value + FROM + (SELECT avg(total) AS total, + breakdown_value AS breakdown_value + FROM + (SELECT count(e.uuid) AS total, + ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'color'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1.0 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(equals(e.event, 'viewed video'), true), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(0))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC')))) + GROUP BY e__pdi.person_id, + breakdown_value) + GROUP BY breakdown_value) + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=1000000, + max_expanded_ast_elements=1000000, + max_query_size=524288 ''' # --- # name: TestTrends.test_trends_count_per_user_average_aggregated_with_event_property_breakdown_with_sampling.1 @@ -4198,44 +4773,49 @@ # --- # name: TestTrends.test_trends_person_breakdown_with_session_property_single_aggregate_math_and_breakdown ''' - SELECT toString(e__pdi__person.`properties___$some_prop`) AS value, - max(e__session.`$session_duration`) AS count - FROM events AS e - LEFT JOIN - (SELECT dateDiff('second', min(sessions.min_timestamp), max(sessions.max_timestamp)) AS `$session_duration`, - sessions.session_id AS session_id - FROM sessions - WHERE and(equals(sessions.team_id, 2), ifNull(greaterOrEquals(plus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), 0), ifNull(lessOrEquals(minus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0), ifNull(greaterOrEquals(plus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), 0), ifNull(lessOrEquals(minus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) - GROUP BY sessions.session_id, - sessions.session_id) AS e__session ON equals(e.`$session_id`, e__session.session_id) - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, - argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - LEFT JOIN - (SELECT person.id AS id, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS `properties___$some_prop` - FROM person - WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), - (SELECT person.id AS id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(person.created_at, person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 + SELECT quantile(0.5)(session_duration) AS total, + breakdown_value AS breakdown_value + FROM + (SELECT any(e__session.`$session_duration`) AS session_duration, + ifNull(nullIf(toString(e__pdi__person.`properties___$some_prop`), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + LEFT JOIN + (SELECT dateDiff('second', min(sessions.min_timestamp), max(sessions.max_timestamp)) AS `$session_duration`, + sessions.session_id AS session_id + FROM sessions + WHERE and(equals(sessions.team_id, 2), ifNull(greaterOrEquals(plus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), 0), ifNull(lessOrEquals(minus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) + GROUP BY sessions.session_id, + sessions.session_id) AS e__session ON equals(e.`$session_id`, e__session.session_id) + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + LEFT JOIN + (SELECT person.id AS id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS `properties___$some_prop` + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(person.created_at, person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), true) + GROUP BY e.`$session_id`, + breakdown_value + ORDER BY 1 DESC, breakdown_value DESC) + GROUP BY breakdown_value + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=1000000, + max_expanded_ast_elements=1000000, + max_query_size=524288 ''' # --- # name: TestTrends.test_trends_person_breakdown_with_session_property_single_aggregate_math_and_breakdown.1 @@ -4324,7 +4904,8 @@ GROUP BY sessions.session_id, sessions.session_id) AS e__session ON equals(e.`$session_id`, e__session.session_id) WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) - GROUP BY e.`$session_id`) + GROUP BY e.`$session_id` + ORDER BY 1 DESC) LIMIT 50000 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1, @@ -4348,7 +4929,8 @@ GROUP BY sessions.session_id, sessions.session_id) AS e__session ON equals(e.`$session_id`, e__session.session_id) WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) - GROUP BY e.`$session_id`) + GROUP BY e.`$session_id` + ORDER BY 1 DESC) LIMIT 50000 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1, @@ -4438,67 +5020,103 @@ # --- # name: TestTrends.test_trends_with_session_property_total_volume_math_with_breakdowns ''' - SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')) AS value, - max(e__session.`$session_duration`) AS count - FROM events AS e - LEFT JOIN - (SELECT dateDiff('second', min(sessions.min_timestamp), max(sessions.max_timestamp)) AS `$session_duration`, - sessions.session_id AS session_id - FROM sessions - WHERE and(equals(sessions.team_id, 2), ifNull(greaterOrEquals(plus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), 0), ifNull(lessOrEquals(minus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0), ifNull(greaterOrEquals(plus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), 0), ifNull(lessOrEquals(minus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) - GROUP BY sessions.session_id, - sessions.session_id) AS e__session ON equals(e.`$session_id`, e__session.session_id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 + SELECT groupArray(1)(date)[1] AS date, + arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + FROM + (SELECT arrayMap(number -> plus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0), toIntervalWeek(number)), range(0, plus(coalesce(dateDiff('week', toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')), 0))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value, + rowNumberInAllBlocks() AS row_number + FROM + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT quantile(0.5)(session_duration) AS total, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT any(e__session.`$session_duration`) AS session_duration, + ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value, + toStartOfWeek(toTimeZone(e.timestamp, 'UTC'), 0) AS day_start + FROM events AS e SAMPLE 1 + LEFT JOIN + (SELECT dateDiff('second', min(sessions.min_timestamp), max(sessions.max_timestamp)) AS `$session_duration`, + sessions.session_id AS session_id + FROM sessions + WHERE and(equals(sessions.team_id, 2), ifNull(greaterOrEquals(plus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), 0), ifNull(lessOrEquals(minus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) + GROUP BY sessions.session_id, + sessions.session_id) AS e__session ON equals(e.`$session_id`, e__session.session_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), true) + GROUP BY day_start, + e.`$session_id`, + breakdown_value, + day_start) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY arraySum(total) DESC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=1000000, + max_expanded_ast_elements=1000000, + max_query_size=524288 ''' # --- # name: TestTrends.test_trends_with_session_property_total_volume_math_with_breakdowns.1 ''' - SELECT arrayMap(number -> plus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0), toIntervalWeek(number)), range(0, plus(coalesce(dateDiff('week', toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')), 0))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, - ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value + SELECT groupArray(1)(date)[1] AS date, + arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value FROM - (SELECT sum(total) AS count, - day_start AS day_start, - breakdown_value AS breakdown_value + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value, + rowNumberInAllBlocks() AS row_number FROM - (SELECT quantile(0.5)(session_duration) AS total, + (SELECT sum(total) AS count, day_start AS day_start, breakdown_value AS breakdown_value FROM - (SELECT any(e__session.`$session_duration`) AS session_duration, - transform(ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$'), ['value2', 'value1'], ['value2', 'value1'], '$$_posthog_breakdown_other_$$') AS breakdown_value, - toStartOfWeek(toTimeZone(e.timestamp, 'UTC'), 0) AS day_start - FROM events AS e SAMPLE 1 - LEFT JOIN - (SELECT dateDiff('second', min(sessions.min_timestamp), max(sessions.max_timestamp)) AS `$session_duration`, - sessions.session_id AS session_id - FROM sessions - WHERE and(equals(sessions.team_id, 2), ifNull(greaterOrEquals(plus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), 0), ifNull(lessOrEquals(minus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) - GROUP BY sessions.session_id, - sessions.session_id) AS e__session ON equals(e.`$session_id`, e__session.session_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), true) + (SELECT quantile(0.5)(session_duration) AS total, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT any(e__session.`$session_duration`) AS session_duration, + ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + LEFT JOIN + (SELECT dateDiff('second', min(sessions.min_timestamp), max(sessions.max_timestamp)) AS `$session_duration`, + sessions.session_id AS session_id + FROM sessions + WHERE and(equals(sessions.team_id, 2), ifNull(greaterOrEquals(plus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(minus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) + GROUP BY sessions.session_id, + sessions.session_id) AS e__session ON equals(e.`$session_id`, e__session.session_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), true) + GROUP BY day_start, + e.`$session_id`, + breakdown_value, + day_start) GROUP BY day_start, - e.`$session_id`, - breakdown_value, - day_start) + breakdown_value) GROUP BY day_start, - breakdown_value) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC) + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY arraySum(total) DESC, breakdown_value ASC) GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)), - arraySum(total) DESC, breakdown_value ASC + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1, diff --git a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends_data_warehouse_query.ambr b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends_data_warehouse_query.ambr index 916b036f00e17..5b65891935f0c 100644 --- a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends_data_warehouse_query.ambr +++ b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends_data_warehouse_query.ambr @@ -1,19 +1,41 @@ # serializer version: 1 # name: TestTrendsDataWarehouseQuery.test_trends_breakdown ''' - SELECT toString(e.prop_1) AS value, - count(e.id) AS count - FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', '`id` String, `prop_1` String, `prop_2` String, `created` DateTime64(3, \'UTC\')') AS e - WHERE and(and(ifNull(greaterOrEquals(toTimeZone(e.created, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toTimeZone(e.created, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0)), and(ifNull(greaterOrEquals(toTimeZone(e.created, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toTimeZone(e.created, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 + SELECT groupArray(1)(date)[1] AS date, + arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + FROM + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value, + rowNumberInAllBlocks() AS row_number + FROM + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT count(e.id) AS total, + toStartOfDay(toTimeZone(e.created, 'UTC')) AS day_start, + ifNull(nullIf(toString(e.prop_1), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', '`id` String, `prop_1` String, `prop_2` String, `created` DateTime64(3, \'UTC\')') AS e + WHERE and(ifNull(greaterOrEquals(toTimeZone(e.created, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toTimeZone(e.created, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), true) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY arraySum(total) DESC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=1000000, + max_expanded_ast_elements=1000000, + max_query_size=524288 ''' # --- # name: TestTrendsDataWarehouseQuery.test_trends_breakdown.1 @@ -51,19 +73,41 @@ # --- # name: TestTrendsDataWarehouseQuery.test_trends_breakdown_with_property ''' - SELECT toString(e.prop_1) AS value, - count(e.id) AS count - FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', '`id` String, `prop_1` String, `prop_2` String, `created` DateTime64(3, \'UTC\')') AS e - WHERE and(and(ifNull(greaterOrEquals(toTimeZone(e.created, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toTimeZone(e.created, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0)), and(ifNull(greaterOrEquals(toTimeZone(e.created, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toTimeZone(e.created, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), equals(e.prop_1, 'a'))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 + SELECT groupArray(1)(date)[1] AS date, + arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + FROM + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value, + rowNumberInAllBlocks() AS row_number + FROM + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT count(e.id) AS total, + toStartOfDay(toTimeZone(e.created, 'UTC')) AS day_start, + ifNull(nullIf(toString(e.prop_1), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', '`id` String, `prop_1` String, `prop_2` String, `created` DateTime64(3, \'UTC\')') AS e + WHERE and(ifNull(greaterOrEquals(toTimeZone(e.created, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toTimeZone(e.created, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), equals(e.prop_1, 'a'), true) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY arraySum(total) DESC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=1000000, + max_expanded_ast_elements=1000000, + max_query_size=524288 ''' # --- # name: TestTrendsDataWarehouseQuery.test_trends_breakdown_with_property.1 diff --git a/posthog/hogql_queries/insights/trends/test/test_trends.py b/posthog/hogql_queries/insights/trends/test/test_trends.py index 26a1b7f1a6d4f..c3838fa8fd63c 100644 --- a/posthog/hogql_queries/insights/trends/test/test_trends.py +++ b/posthog/hogql_queries/insights/trends/test/test_trends.py @@ -1117,11 +1117,7 @@ def test_unique_session_with_session_breakdown(self): self.assertEqual( [(item["breakdown_value"], item["count"], item["data"]) for item in response], - [ - ("[4.95,10.05]", 2.0, [2, 0, 0, 0]), - ("[0.0,4.95]", 1.0, [1, 0, 0, 0]), - ("[10.05,15.01]", 1.0, [0, 1, 0, 0]), - ], + [("[10,15.01]", 2.0, [1, 1, 0, 0]), ("[0,5]", 1.0, [1, 0, 0, 0]), ("[5,10]", 1.0, [1, 0, 0, 0])], ) @also_test_with_person_on_events_v2 @@ -6503,9 +6499,9 @@ def test_breakdown_filtering_bar_chart_by_value(self): self.team, ) - self.assertEqual(response[0]["aggregated_value"], 1) + self.assertEqual(response[0]["aggregated_value"], 2) self.assertEqual(response[1]["aggregated_value"], 1) - self.assertEqual(response[2]["aggregated_value"], 2) # the events without breakdown value + self.assertEqual(response[2]["aggregated_value"], 1) # the events without breakdown value self.assertEqual(response[0]["days"], []) @also_test_with_materialized_columns(person_properties=["key", "key_2"], verify_no_jsonextract=False) diff --git a/posthog/hogql_queries/insights/trends/test/test_trends_persons.py b/posthog/hogql_queries/insights/trends/test/test_trends_persons.py index 1751c7482dd40..ff1991ff537d8 100644 --- a/posthog/hogql_queries/insights/trends/test/test_trends_persons.py +++ b/posthog/hogql_queries/insights/trends/test/test_trends_persons.py @@ -343,7 +343,6 @@ def test_trends_breakdown_hogql_persons(self): ) result = self._get_actors(trends_query=source_query, day="2023-05-01", breakdown=20) - self.assertEqual(len(result), 1) self.assertEqual(get_distinct_id(result[0]), "person1") self.assertEqual(get_event_count(result[0]), 1) diff --git a/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py b/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py index c87d55643d771..aeadb90d1b199 100644 --- a/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py +++ b/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py @@ -9,8 +9,7 @@ from posthog.hogql import ast from posthog.hogql.constants import MAX_SELECT_RETURNED_ROWS, LimitContext from posthog.hogql.modifiers import create_default_modifiers_for_team -from posthog.hogql_queries.insights.trends.breakdown_values import BREAKDOWN_OTHER_DISPLAY -from posthog.hogql_queries.insights.trends.trends_query_runner import TrendsQueryRunner +from posthog.hogql_queries.insights.trends.trends_query_runner import TrendsQueryRunner, BREAKDOWN_OTHER_DISPLAY from posthog.models.cohort.cohort import Cohort from posthog.models.property_definition import PropertyDefinition @@ -811,7 +810,7 @@ def test_breakdown_is_context_aware(self, mock_sync_execute: MagicMock): limit_context=LimitContext.QUERY_ASYNC, ) - self.assertEqual(mock_sync_execute.call_count, 4) + self.assertEqual(mock_sync_execute.call_count, 2) for mock_execute_call_args in mock_sync_execute.call_args_list: self.assertIn(f" max_execution_time={HOGQL_INCREASED_MAX_EXECUTION_TIME},", mock_execute_call_args[0][0]) @@ -981,11 +980,11 @@ def test_trends_breakdowns_histogram(self): breakdown_labels = [result["breakdown_value"] for result in response.results] assert len(response.results) == 4 - assert breakdown_labels == ["[10.0,17.5]", "[17.5,25.0]", "[25.0,32.5]", "[32.5,40.01]"] + assert breakdown_labels == ["[10,17.5]", "[17.5,25]", "[25,32.5]", "[32.5,40.01]"] - assert response.results[0]["label"] == "[10.0,17.5]" - assert response.results[1]["label"] == "[17.5,25.0]" - assert response.results[2]["label"] == "[25.0,32.5]" + assert response.results[0]["label"] == "[10,17.5]" + assert response.results[1]["label"] == "[17.5,25]" + assert response.results[2]["label"] == "[25,32.5]" assert response.results[3]["label"] == "[32.5,40.01]" assert response.results[0]["data"] == [0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0] @@ -1992,10 +1991,10 @@ def test_to_actors_query_options_breakdowns(self): assert response.day is not None assert response.series == [InsightActorsQuerySeries(label="$pageview", value=0)] assert response.breakdown == [ - BreakdownItem(label=BREAKDOWN_OTHER_DISPLAY, value="$$_posthog_breakdown_other_$$"), BreakdownItem(label="Chrome", value="Chrome"), BreakdownItem(label="Firefox", value="Firefox"), - BreakdownItem(label="Safari", value="Safari"), + BreakdownItem(label="Edge", value="Edge"), + BreakdownItem(label=BREAKDOWN_OTHER_DISPLAY, value="$$_posthog_breakdown_other_$$"), ] def test_to_actors_query_options_breakdowns_boolean(self): @@ -2042,9 +2041,9 @@ def test_to_actors_query_options_breakdowns_histogram(self): assert response.series == [InsightActorsQuerySeries(label="$pageview", value=0)] assert response.breakdown == [ - BreakdownItem(label="[10.0,17.5]", value="[10.0,17.5]"), - BreakdownItem(label="[17.5,25.0]", value="[17.5,25.0]"), - BreakdownItem(label="[25.0,32.5]", value="[25.0,32.5]"), + BreakdownItem(label="[10,17.5]", value="[10,17.5]"), + BreakdownItem(label="[17.5,25]", value="[17.5,25]"), + BreakdownItem(label="[25,32.5]", value="[25,32.5]"), BreakdownItem(label="[32.5,40.01]", value="[32.5,40.01]"), BreakdownItem(label='["",""]', value='["",""]'), ] @@ -2105,8 +2104,8 @@ def test_to_actors_query_options_breakdowns_hogql(self): assert response.breakdown == [ BreakdownItem(label="Chrome", value="Chrome"), BreakdownItem(label="Firefox", value="Firefox"), - BreakdownItem(label="Safari", value="Safari"), BreakdownItem(label="Edge", value="Edge"), + BreakdownItem(label="Safari", value="Safari"), ] def test_to_actors_query_options_bar_value(self): diff --git a/posthog/hogql_queries/insights/trends/trends_actors_query_builder.py b/posthog/hogql_queries/insights/trends/trends_actors_query_builder.py index 2d404f0a8c092..bf55a3eb82489 100644 --- a/posthog/hogql_queries/insights/trends/trends_actors_query_builder.py +++ b/posthog/hogql_queries/insights/trends/trends_actors_query_builder.py @@ -384,12 +384,11 @@ def _breakdown_where_expr(self) -> list[ast.Expr]: timings=self.timings, modifiers=self.modifiers, events_filter=self._events_where_expr(with_breakdown_expr=False), - breakdown_values_override=[self.breakdown_value] if self.breakdown_value is not None else None, limit_context=self.limit_context, ) if breakdown.enabled and not breakdown.is_histogram_breakdown: - breakdown_filter = breakdown.events_where_filter() + breakdown_filter = breakdown.events_where_filter(breakdown_values_override=self.breakdown_value) if breakdown_filter is not None: conditions.append(breakdown_filter) diff --git a/posthog/hogql_queries/insights/trends/trends_query_builder.py b/posthog/hogql_queries/insights/trends/trends_query_builder.py index 015e269e5628e..75418ffba2a3e 100644 --- a/posthog/hogql_queries/insights/trends/trends_query_builder.py +++ b/posthog/hogql_queries/insights/trends/trends_query_builder.py @@ -8,13 +8,13 @@ from posthog.hogql_queries.insights.trends.aggregation_operations import ( AggregationOperations, ) -from posthog.hogql_queries.insights.trends.breakdown import Breakdown -from posthog.hogql_queries.insights.trends.breakdown_values import BREAKDOWN_OTHER_STRING_LABEL +from posthog.hogql_queries.insights.trends.breakdown import Breakdown, BREAKDOWN_OTHER_STRING_LABEL from posthog.hogql_queries.insights.trends.display import TrendsDisplay from posthog.hogql_queries.insights.trends.utils import series_event_name from posthog.hogql_queries.utils.query_date_range import QueryDateRange from posthog.models.action.action import Action from posthog.models.filters.mixins.utils import cached_property +from posthog.hogql.constants import get_breakdown_limit_for_context from posthog.models.team.team import Team from posthog.queries.trends.breakdown import BREAKDOWN_NULL_STRING_LABEL from posthog.schema import ( @@ -98,14 +98,12 @@ def _get_events_subquery( no_modifications: Optional[bool], is_actors_query: bool, breakdown: Breakdown, - breakdown_values_override: Optional[str | int] = None, actors_query_time_frame: Optional[str] = None, ) -> ast.SelectQuery: events_filter = self._events_filter( ignore_breakdowns=False, breakdown=breakdown, is_actors_query=is_actors_query, - breakdown_values_override=breakdown_values_override, actors_query_time_frame=actors_query_time_frame, ) @@ -132,7 +130,13 @@ def _get_events_subquery( ), ) - if not self._trends_display.is_total_value(): # TODO: remove: and not is_actors_query + # If it's total value, we should order the results as there's no outer query to do the ordering + if self._trends_display.is_total_value(): + default_query.order_by = [ast.OrderExpr(expr=parse_expr("1"), order="DESC")] + if breakdown.enabled: + default_query.order_by.append(ast.OrderExpr(expr=ast.Field(chain=["breakdown_value"]), order="DESC")) + + else: # For cumulative unique users or groups, we want to count each user or group once per query, not per day if ( self.query.trendsFilter @@ -197,12 +201,15 @@ def _get_events_subquery( wrapper.group_by.append(ast.Field(chain=["day_start"])) wrapper.select.append(ast.Field(chain=["breakdown_value"])) - wrapper.group_by.append(ast.Field(chain=["breakdown_value"])) + if not breakdown.is_histogram_breakdown: + wrapper.group_by.append(ast.Field(chain=["breakdown_value"])) return wrapper + # Just breakdowns elif breakdown.enabled: breakdown_expr = breakdown.column_expr() + default_query.select.append(breakdown_expr) default_query.group_by.append(ast.Field(chain=["breakdown_value"])) # Just session duration math property @@ -236,7 +243,9 @@ def _get_events_subquery( return default_query - def _outer_select_query(self, breakdown: Breakdown, inner_query: ast.SelectQuery) -> ast.SelectQuery: + def _outer_select_query( + self, breakdown: Breakdown, inner_query: ast.SelectQuery + ) -> ast.SelectQuery | ast.SelectUnionQuery: total_array = parse_expr( """ arrayMap( @@ -322,24 +331,47 @@ def _outer_select_query(self, breakdown: Breakdown, inner_query: ast.SelectQuery ), ) ) + query.select.append(ast.Alias(alias="row_number", expr=parse_expr("rowNumberInAllBlocks()"))) query.group_by = [ast.Field(chain=["breakdown_value"])] - query.order_by.insert( - 0, - cast( - ast.OrderExpr, - parse_expr( - "breakdown_value = {other} ? 2 : breakdown_value = {nil} ? 1 : 0", - placeholders={ - "other": ast.Constant(value=BREAKDOWN_OTHER_STRING_LABEL), - "nil": ast.Constant(value=BREAKDOWN_NULL_STRING_LABEL), - }, - ), - ), - ) + query.order_by.append(ast.OrderExpr(expr=ast.Field(chain=["breakdown_value"]), order="ASC")) + # TODO: What happens with cohorts and this limit? + if not breakdown.is_histogram_breakdown: + return parse_select( + """ + SELECT + groupArray(1)(date)[1] as date, + arrayMap( + i -> + arraySum(arrayMap( + x -> arrayElement(x, i), + groupArray(total) + )), + arrayEnumerate(date) + ) as total, + if(row_number >= {breakdown_limit}, {other}, breakdown_value) as breakdown_value + FROM {outer_query} + GROUP BY breakdown_value + ORDER BY + breakdown_value = {other} ? 2 : breakdown_value = {nil} ? 1 : 0, + arraySum(total) DESC, + breakdown_value ASC + """, + { + "outer_query": query, + "breakdown_limit": ast.Constant(value=self._get_breakdown_limit()), + "other": ast.Constant(value=BREAKDOWN_OTHER_STRING_LABEL), + "nil": ast.Constant(value=BREAKDOWN_NULL_STRING_LABEL), + }, + ) return query + def _get_breakdown_limit(self) -> int: + return ( + self.query.breakdownFilter and self.query.breakdownFilter.breakdown_limit + ) or get_breakdown_limit_for_context(self.limit_context) + def _inner_select_query( self, breakdown: Breakdown, inner_query: ast.SelectQuery | ast.SelectUnionQuery ) -> ast.SelectQuery: @@ -364,7 +396,47 @@ def _inner_select_query( query.order_by.append(ast.OrderExpr(expr=ast.Field(chain=["day_start"]), order="ASC")) if breakdown.enabled: - query.select.append(ast.Field(chain=["breakdown_value"])) + if breakdown.is_histogram_breakdown: + histogram_bin_count = ( + self.query.breakdownFilter.breakdown_histogram_bin_count if self.query.breakdownFilter else None + ) + query.ctes = { + "min_max": ast.CTE( + name="min_max", + expr=self._get_events_subquery( + no_modifications=False, is_actors_query=False, breakdown=breakdown + ), + cte_type="subquery", + ) + } + query.select.extend( + [ + # Using arrays would be more efficient here, _but_ only if there's low cardinality in breakdown_values + # If cardinality is high it'd blow up memory + # Clickhouse is reasonably clever not rereading the same data + parse_expr("(select max(breakdown_value) from min_max) as max_num"), + parse_expr("(select min(breakdown_value) from min_max) as min_num"), + parse_expr("max_num - min_num as diff"), + parse_expr(f"{histogram_bin_count} as bins"), + parse_expr(""" + arrayMap( + x -> [ + ((diff / bins) * x) + min_num, + ((diff / bins) * (x + 1)) + min_num + if(x + 1 = bins, 0.01, 0) + ], + range(bins) + ) as buckets + """), + parse_expr("""arrayFilter( + x -> + x[1] <= breakdown_value and breakdown_value < x[2], + buckets + )[1] as breakdown_value + """), + ] + ) + else: + query.select.append(ast.Field(chain=["breakdown_value"])) query.group_by.append(ast.Field(chain=["breakdown_value"])) query.order_by.append(ast.OrderExpr(expr=ast.Field(chain=["breakdown_value"]), order="ASC")) @@ -380,7 +452,6 @@ def _events_filter( is_actors_query: bool, breakdown: Breakdown | None, ignore_breakdowns: bool = False, - breakdown_values_override: Optional[str | int] = None, actors_query_time_frame: Optional[str] = None, ) -> ast.Expr: series = self.series @@ -479,7 +550,7 @@ def session_duration_math_property_wrapper(self, default_query: ast.SelectQuery) query.group_by = [] return query - def _breakdown(self, is_actors_query: bool, breakdown_values_override: Optional[str] = None): + def _breakdown(self, is_actors_query: bool): return Breakdown( team=self.team, query=self.query, @@ -491,9 +562,7 @@ def _breakdown(self, is_actors_query: bool, breakdown_values_override: Optional[ breakdown=None, # Passing in None because we know we dont actually need it ignore_breakdowns=True, is_actors_query=is_actors_query, - breakdown_values_override=breakdown_values_override, ), - breakdown_values_override=[breakdown_values_override] if breakdown_values_override is not None else None, limit_context=self.limit_context, ) diff --git a/posthog/hogql_queries/insights/trends/trends_query_runner.py b/posthog/hogql_queries/insights/trends/trends_query_runner.py index 50d7db4160255..4717e0a3e4943 100644 --- a/posthog/hogql_queries/insights/trends/trends_query_runner.py +++ b/posthog/hogql_queries/insights/trends/trends_query_runner.py @@ -22,13 +22,13 @@ from posthog.hogql.printer import to_printed_hogql from posthog.hogql.query import execute_hogql_query from posthog.hogql.timings import HogQLTimings -from posthog.hogql_queries.insights.trends.breakdown_values import ( +from posthog.hogql_queries.insights.trends.display import TrendsDisplay +from posthog.hogql_queries.insights.trends.breakdown import ( BREAKDOWN_NULL_DISPLAY, BREAKDOWN_NULL_STRING_LABEL, BREAKDOWN_OTHER_DISPLAY, BREAKDOWN_OTHER_STRING_LABEL, ) -from posthog.hogql_queries.insights.trends.display import TrendsDisplay from posthog.hogql_queries.insights.trends.trends_query_builder import TrendsQueryBuilder from posthog.hogql_queries.insights.trends.trends_actors_query_builder import TrendsActorsQueryBuilder from posthog.hogql_queries.insights.trends.series_with_extras import SeriesWithExtras @@ -215,60 +215,65 @@ def to_actors_query_options(self) -> InsightActorsQueryOptionsResponse: ] # Breakdowns - for series in self.query.series: - # TODO: Add support for DataWarehouseNode - if isinstance(series, DataWarehouseNode): - continue + if self.query.breakdownFilter is not None: + res_breakdown = [] + if self.query.breakdownFilter.breakdown_type == "cohort": + assert isinstance(self.query.breakdownFilter.breakdown, list) + for value in self.query.breakdownFilter.breakdown: + if value != "all" and str(value) != "0": + res_breakdown.append( + BreakdownItem(label=Cohort.objects.get(pk=int(value), team=self.team).name, value=value) + ) + else: + res_breakdown.append(BreakdownItem(label="all users", value="all")) + else: + # TODO: Work out if we will have issues only getting breakdown values for + # the "current" period and not "previous" period for when "compare" is turned on + query_date_range = self.query_date_range - # TODO: Work out if we will have issues only getting breakdown values for - # the "current" period and not "previous" period for when "compare" is turned on - query_date_range = self.query_date_range + query_builder = TrendsQueryBuilder( + trends_query=self.query, + team=self.team, + query_date_range=query_date_range, + series=series, + timings=self.timings, + modifiers=self.modifiers, + limit_context=self.limit_context, + ) - query_builder = TrendsQueryBuilder( - trends_query=self.query, - team=self.team, - query_date_range=query_date_range, - series=series, - timings=self.timings, - modifiers=self.modifiers, - limit_context=self.limit_context, - ) + query = query_builder.build_query() - breakdown = query_builder._breakdown(is_actors_query=False) - if not breakdown.enabled: - break + breakdown = query_builder._breakdown(is_actors_query=False) - is_boolean_breakdown = self._is_breakdown_field_boolean() - is_histogram_breakdown = breakdown.is_histogram_breakdown - breakdown_values: list[str | int] - res_breakdown = [] + results = execute_hogql_query( + query_type="TrendsActorsQueryOptions", + query=query, + team=self.team, + # timings=timings, + # modifiers=modifiers, + ) + breakdown_values = [ + row[results.columns.index("breakdown_value") if results.columns else 2] for row in results.results + ] - if is_histogram_breakdown: - buckets = breakdown._get_breakdown_histogram_buckets() - breakdown_values = [f"[{t[0]},{t[1]}]" for t in buckets] - # TODO: append this only if needed - breakdown_values.append('["",""]') - else: - breakdown_values = breakdown._breakdown_values - - for value in breakdown_values: - if self.query.breakdownFilter is not None and self.query.breakdownFilter.breakdown_type == "cohort": - is_all = value == "all" or str(value) == "0" - label = "all users" if is_all else Cohort.objects.get(pk=value).name - value = "all" if is_all else value - elif value == BREAKDOWN_OTHER_STRING_LABEL: - label = BREAKDOWN_OTHER_DISPLAY - elif value == BREAKDOWN_NULL_STRING_LABEL: - label = BREAKDOWN_NULL_DISPLAY - elif is_boolean_breakdown: - label = self._convert_boolean(value) - else: - label = str(value) + if breakdown.is_histogram_breakdown: + breakdown_values.append('["",""]') + is_boolean_breakdown = self._is_breakdown_field_boolean() + + for value in breakdown_values: + if value == BREAKDOWN_OTHER_STRING_LABEL: + label = BREAKDOWN_OTHER_DISPLAY + elif value == BREAKDOWN_NULL_STRING_LABEL: + label = BREAKDOWN_NULL_DISPLAY + elif is_boolean_breakdown: + label = self._convert_boolean(value) + else: + label = str(value) - item = BreakdownItem(label=label, value=value) + item = BreakdownItem(label=label, value=value) - if item not in res_breakdown: - res_breakdown.append(item) + if item not in res_breakdown: + res_breakdown.append(item) return InsightActorsQueryOptionsResponse( series=res_series, breakdown=res_breakdown, day=res_days, compare=res_compare diff --git a/posthog/queries/trends/breakdown.py b/posthog/queries/trends/breakdown.py index e0bab69fe666d..663ccd42ccff6 100644 --- a/posthog/queries/trends/breakdown.py +++ b/posthog/queries/trends/breakdown.py @@ -18,7 +18,6 @@ PropertyOperatorType, TREND_FILTER_TYPE_EVENTS, ) -from posthog.hogql_queries.insights.trends.breakdown_values import BREAKDOWN_NULL_DISPLAY, BREAKDOWN_OTHER_DISPLAY from posthog.models.action.util import format_action_filter from posthog.models.entity import Entity from posthog.models.event.sql import EVENT_JOIN_PERSON_SQL @@ -82,6 +81,10 @@ from posthog.utils import encode_get_request_params, generate_short_id from posthog.queries.person_on_events_v2_sql import PERSON_DISTINCT_ID_OVERRIDES_JOIN_SQL +BREAKDOWN_OTHER_DISPLAY = "Other (i.e. all remaining values)" +BREAKDOWN_NULL_DISPLAY = "None (i.e. no value)" + + BREAKDOWN_OTHER_STRING_LABEL = "$$_posthog_breakdown_other_$$" BREAKDOWN_OTHER_NUMERIC_LABEL = 9007199254740991 # pow(2, 53) - 1, for JS compatibility BREAKDOWN_NULL_STRING_LABEL = "$$_posthog_breakdown_null_$$" diff --git a/posthog/queries/trends/test/test_paging_breakdowns.py b/posthog/queries/trends/test/test_paging_breakdowns.py index 47ea447005c1a..b7bee93f31dc6 100644 --- a/posthog/queries/trends/test/test_paging_breakdowns.py +++ b/posthog/queries/trends/test/test_paging_breakdowns.py @@ -2,12 +2,13 @@ from freezegun import freeze_time -from posthog.hogql_queries.insights.trends.breakdown_values import BREAKDOWN_OTHER_DISPLAY from posthog.models import Filter from posthog.queries.trends.trends import Trends from posthog.test.base import APIBaseTest from posthog.test.test_journeys import journeys_for +BREAKDOWN_OTHER_DISPLAY = "Other (i.e. all remaining values)" + class TestPagingBreakdowns(APIBaseTest): """