From 7f8c348a2936256ca1bfdb7db44e01d30c1658c2 Mon Sep 17 00:00:00 2001 From: Alexander Spicer Date: Wed, 19 Jun 2024 12:11:50 -0700 Subject: [PATCH 01/15] where clause --- posthog/hogql/constants.py | 1 + .../schema/util/where_clause_extractor.py | 41 ++++++++++++++++--- posthog/hogql/visitor.py | 15 ++++--- posthog/hogql_queries/actors_query_runner.py | 39 +++++++++++++++--- .../test/test_insight_actors_query_runner.py | 4 ++ requirements.txt | 4 +- 6 files changed, 85 insertions(+), 19 deletions(-) diff --git a/posthog/hogql/constants.py b/posthog/hogql/constants.py index 769d4a250e65e..5d27150cc1a1c 100644 --- a/posthog/hogql/constants.py +++ b/posthog/hogql/constants.py @@ -92,6 +92,7 @@ def get_breakdown_limit_for_context(limit_context: LimitContext) -> int: class HogQLQuerySettings(BaseModel): model_config = ConfigDict(extra="forbid") optimize_aggregation_in_order: Optional[bool] = None + use_query_cache: Optional[bool] = None # Settings applied on top of all HogQL queries. diff --git a/posthog/hogql/database/schema/util/where_clause_extractor.py b/posthog/hogql/database/schema/util/where_clause_extractor.py index 4f0096af9ff53..12976840b2caa 100644 --- a/posthog/hogql/database/schema/util/where_clause_extractor.py +++ b/posthog/hogql/database/schema/util/where_clause_extractor.py @@ -65,11 +65,37 @@ def add_local_tables(self, join_or_table: LazyJoinToAdd | LazyTableToAdd): def get_inner_where(self, select_query: ast.SelectQuery) -> Optional[ast.Expr]: """Return the where clause that should be applied to the inner table. If None is returned, no pre-filtering is possible.""" - if not select_query.where and not select_query.prewhere: - return None - # visit the where clause wheres = [] + + # If CTEs exist for what we're looking for, apply them + from posthog.hogql.database.schema.persons import PersonsTable + + for table in self.tracked_tables: + if isinstance(table, PersonsTable): + if "person_ids" in select_query.type.ctes: + # wheres.append(parse_expr("persons.id IN person_ids")) + wheres.append( + ast.CompareOperation( + op=ast.CompareOperationOp.In, + left=ast.Field(chain=["id"], type=ast.FieldType(name="id", table_type=table)), + right=ast.SelectQuery( + select=[ast.Field(chain=["actor_id"])], + select_from=ast.JoinExpr(table=ast.Field(chain=["source"])), + ), + ) + ) + + """ + right=ast.Field( + chain=["person_ids"], + type=ast.FieldType( + name="person_ids", table_type=select_query.type.ctes["person_ids"].expr + ), + ), + """ + + # visit the where clause if select_query.where: wheres.append(select_query.where) if select_query.prewhere: @@ -80,8 +106,8 @@ def get_inner_where(self, select_query: ast.SelectQuery) -> Optional[ast.Expr]: else: where = self.visit(ast.And(exprs=wheres)) - if isinstance(where, ast.Constant): - return None + # if isinstance(where, ast.Constant): + # return None return clone_expr(where, clear_types=True, clear_locations=True) @@ -190,7 +216,8 @@ def visit_compare_operation(self, node: ast.CompareOperation) -> ast.Expr: def visit_select_query(self, node: ast.SelectQuery) -> ast.Expr: # going too deep, bail - return ast.Constant(value=True) + # return ast.Constant(value=True) + return node def visit_arithmetic_operation(self, node: ast.ArithmeticOperation) -> ast.Expr: # don't even try to handle complex logic @@ -267,6 +294,8 @@ def visit_field(self, node: ast.Field) -> ast.Expr: chain_length = 1 new_field.chain = new_field.chain[-chain_length:] return new_field + if isinstance(node, ast.Field): + return node return ast.Constant(value=self.tombstone_string) def visit_constant(self, node: ast.Constant) -> ast.Expr: diff --git a/posthog/hogql/visitor.py b/posthog/hogql/visitor.py index d03e691b640ec..4c428c311f912 100644 --- a/posthog/hogql/visitor.py +++ b/posthog/hogql/visitor.py @@ -347,12 +347,15 @@ def visit_arithmetic_operation(self, node: ast.ArithmeticOperation): ) def visit_and(self, node: ast.And): - return ast.And( - start=None if self.clear_locations else node.start, - end=None if self.clear_locations else node.end, - type=None if self.clear_types else node.type, - exprs=[self.visit(expr) for expr in node.exprs], - ) + try: + return ast.And( + start=None if self.clear_locations else node.start, + end=None if self.clear_locations else node.end, + type=None if self.clear_types else node.type, + exprs=[self.visit(expr) for expr in node.exprs], + ) + except AttributeError: + pass def visit_or(self, node: ast.Or): return ast.Or( diff --git a/posthog/hogql_queries/actors_query_runner.py b/posthog/hogql_queries/actors_query_runner.py index 422fa5a82d75f..4fe690e98f358 100644 --- a/posthog/hogql_queries/actors_query_runner.py +++ b/posthog/hogql_queries/actors_query_runner.py @@ -2,7 +2,8 @@ from typing import Optional from collections.abc import Sequence, Iterator from posthog.hogql import ast -from posthog.hogql.parser import parse_expr, parse_order_expr +from posthog.hogql.constants import HogQLQuerySettings +from posthog.hogql.parser import parse_expr, parse_order_expr, parse_select from posthog.hogql.property import has_aggregation from posthog.hogql_queries.actor_strategies import ActorStrategy, PersonStrategy, GroupStrategy from posthog.hogql_queries.insights.insight_actors_query_runner import InsightActorsQueryRunner @@ -230,12 +231,40 @@ def to_query(self) -> ast.SelectQuery: order_by = [] with self.timings.measure("select"): - if self.query.source: - join_expr = self.source_table_join() - else: - join_expr = ast.JoinExpr(table=ast.Field(chain=[self.strategy.origin])) + # Insert CTE here + assert self.source_query_runner is not None # For type checking + source_query = self.source_query_runner.to_actors_query() + if source_query.settings is None: + source_query.settings = HogQLQuerySettings() + source_query.settings.use_query_cache = True + + source_id_chain = self.source_id_column(source_query) + source_alias = "source" + + join_expr = ast.JoinExpr( + table=ast.Field(chain=[source_alias]), + next_join=ast.JoinExpr( + table=ast.Field(chain=[self.strategy.origin]), + join_type="INNER JOIN", + constraint=ast.JoinConstraint( + expr=ast.CompareOperation( + op=ast.CompareOperationOp.Eq, + left=ast.Field(chain=[self.strategy.origin, self.strategy.origin_id]), + right=ast.Field(chain=[source_alias, *source_id_chain]), + ), + constraint_type="ON", + ), + ), + ) + + s = parse_select("SELECT actor_id FROM source") + s.select_from.table = source_query stmt = ast.SelectQuery( + ctes={ + source_alias: ast.CTE(name=source_alias, expr=source_query, cte_type="subquery"), + "person_ids": ast.CTE(name="person_ids", expr=s, cte_type="subquery"), + }, select=columns, select_from=join_expr, where=where, diff --git a/posthog/hogql_queries/insights/test/test_insight_actors_query_runner.py b/posthog/hogql_queries/insights/test/test_insight_actors_query_runner.py index 0a41138197228..eadae733fcfdf 100644 --- a/posthog/hogql_queries/insights/test/test_insight_actors_query_runner.py +++ b/posthog/hogql_queries/insights/test/test_insight_actors_query_runner.py @@ -7,11 +7,13 @@ from posthog.models.group.util import create_group from posthog.models.group_type_mapping import GroupTypeMapping from posthog.models.team import WeekStartDay +from posthog.schema import HogQLQueryModifiers from posthog.test.base import ( APIBaseTest, ClickhouseTestMixin, _create_event, _create_person, + snapshot_clickhouse_queries, ) @@ -76,6 +78,7 @@ def select(self, query: str, placeholders: Optional[dict[str, Any]] = None): query=query, team=self.team, placeholders=placeholders, + modifiers=HogQLQueryModifiers(optimizeJoinedFilters=True), ) def test_insight_persons_lifecycle_query(self): @@ -205,6 +208,7 @@ def test_insight_persons_stickiness_groups_query(self): self.assertEqual([("org1",)], response.results) + @snapshot_clickhouse_queries def test_insight_persons_trends_query(self): self._create_test_events() self.team.timezone = "US/Pacific" diff --git a/requirements.txt b/requirements.txt index c31cd102213b6..14fc67e574239 100644 --- a/requirements.txt +++ b/requirements.txt @@ -321,7 +321,7 @@ kombu==5.3.2 # via # -r requirements.in # celery -lxml==4.9.4 +lxml==5.2.2 # via # python3-saml # toronado @@ -685,7 +685,7 @@ wrapt==1.15.0 # via aiobotocore wsproto==1.1.0 # via trio-websocket -xmlsec==1.3.13 +xmlsec==1.3.14 # via python3-saml yarl==1.7.2 # via aiohttp From 3aac62a9c3b9c57d9a3767da4a6f7b2bb4b1e983 Mon Sep 17 00:00:00 2001 From: Alexander Spicer Date: Wed, 19 Jun 2024 12:28:27 -0700 Subject: [PATCH 02/15] persons passdown --- .../schema/util/where_clause_extractor.py | 27 +++++++++++-------- posthog/hogql_queries/actors_query_runner.py | 3 ++- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/posthog/hogql/database/schema/util/where_clause_extractor.py b/posthog/hogql/database/schema/util/where_clause_extractor.py index 12976840b2caa..414431e4eb5d8 100644 --- a/posthog/hogql/database/schema/util/where_clause_extractor.py +++ b/posthog/hogql/database/schema/util/where_clause_extractor.py @@ -6,6 +6,7 @@ from posthog.hogql.ast import CompareOperationOp, ArithmeticOperationOp from posthog.hogql.context import HogQLContext from posthog.hogql.database.models import DatabaseField, LazyJoinToAdd, LazyTableToAdd +from posthog.hogql.database.schema.person_distinct_ids import PersonDistinctIdsTable from posthog.hogql.visitor import clone_expr, CloningVisitor, Visitor, TraversingVisitor @@ -80,20 +81,24 @@ def get_inner_where(self, select_query: ast.SelectQuery) -> Optional[ast.Expr]: op=ast.CompareOperationOp.In, left=ast.Field(chain=["id"], type=ast.FieldType(name="id", table_type=table)), right=ast.SelectQuery( - select=[ast.Field(chain=["actor_id"])], - select_from=ast.JoinExpr(table=ast.Field(chain=["source"])), + select=[ast.Field(chain=["person_id"])], + select_from=ast.JoinExpr(table=ast.Field(chain=["person_ids"])), + ), + ) + ) + if isinstance(table, PersonDistinctIdsTable): + if "distinct_ids" in select_query.type.ctes: + # wheres.append(parse_expr("persons.id IN person_ids")) + wheres.append( + ast.CompareOperation( + op=ast.CompareOperationOp.In, + left=ast.Field(chain=["id"], type=ast.FieldType(name="id", table_type=table)), + right=ast.SelectQuery( + select=[ast.Field(chain=["distinct_id"])], + select_from=ast.JoinExpr(table=ast.Field(chain=["distinct_ids"])), ), ) ) - - """ - right=ast.Field( - chain=["person_ids"], - type=ast.FieldType( - name="person_ids", table_type=select_query.type.ctes["person_ids"].expr - ), - ), - """ # visit the where clause if select_query.where: diff --git a/posthog/hogql_queries/actors_query_runner.py b/posthog/hogql_queries/actors_query_runner.py index 4fe690e98f358..044b76ce9d934 100644 --- a/posthog/hogql_queries/actors_query_runner.py +++ b/posthog/hogql_queries/actors_query_runner.py @@ -257,12 +257,13 @@ def to_query(self) -> ast.SelectQuery: ), ) - s = parse_select("SELECT actor_id FROM source") + s = parse_select("SELECT actor_id as person_id FROM source") s.select_from.table = source_query stmt = ast.SelectQuery( ctes={ source_alias: ast.CTE(name=source_alias, expr=source_query, cte_type="subquery"), + # How to get rid of the extra superfluous select "person_ids": ast.CTE(name="person_ids", expr=s, cte_type="subquery"), }, select=columns, From 8cf5efcd504179cddb14035f1a399084667761f1 Mon Sep 17 00:00:00 2001 From: Alexander Spicer Date: Wed, 19 Jun 2024 13:15:24 -0700 Subject: [PATCH 03/15] working --- .../database/schema/person_distinct_ids.py | 36 +++++++++++++-- posthog/hogql/database/schema/persons.py | 19 ++++++++ .../schema/util/where_clause_extractor.py | 46 +++---------------- posthog/hogql/visitor.py | 15 +++--- .../trends/trends_actors_query_builder.py | 34 +++++++++++++- 5 files changed, 95 insertions(+), 55 deletions(-) diff --git a/posthog/hogql/database/schema/person_distinct_ids.py b/posthog/hogql/database/schema/person_distinct_ids.py index 7d6e8ae56e8b3..16ba0027fff03 100644 --- a/posthog/hogql/database/schema/person_distinct_ids.py +++ b/posthog/hogql/database/schema/person_distinct_ids.py @@ -1,4 +1,5 @@ -from posthog.hogql.ast import SelectQuery +from posthog.hogql import ast +from posthog.hogql.ast import SelectQuery, And from posthog.hogql.context import HogQLContext from posthog.hogql.database.argmax import argmax_select @@ -15,6 +16,7 @@ ) from posthog.hogql.database.schema.persons import join_with_persons_table from posthog.hogql.errors import ResolutionError +from posthog.hogql.visitor import clone_expr PERSON_DISTINCT_IDS_FIELDS = { "team_id": IntegerDatabaseField(name="team_id"), @@ -28,11 +30,13 @@ } -def select_from_person_distinct_ids_table(requested_fields: dict[str, list[str | int]]): +def select_from_person_distinct_ids_table( + requested_fields: dict[str, list[str | int]], context: HogQLContext, node: SelectQuery +): # Always include "person_id", as it's the key we use to make further joins, and it'd be great if it's available if "person_id" not in requested_fields: requested_fields = {**requested_fields, "person_id": ["person_id"]} - return argmax_select( + select = argmax_select( table_name="raw_person_distinct_ids", select_fields=requested_fields, group_fields=["distinct_id"], @@ -40,6 +44,28 @@ def select_from_person_distinct_ids_table(requested_fields: dict[str, list[str | deleted_field="is_deleted", ) + if "person_ids" in node.type.ctes: + comparison = clone_expr( + ast.CompareOperation( + op=ast.CompareOperationOp.In, + left=ast.Field( + chain=["distinct_id"], type=ast.FieldType(name="distinct_id", table_type=PersonDistinctIdsTable) + ), + right=ast.SelectQuery( + select=[ast.Field(chain=["person_id"])], + select_from=ast.JoinExpr(table=ast.Field(chain=["person_ids"])), + ), + ), + clear_types=True, + clear_locations=True, + ) + if select.where: + select.where = And(exprs=[comparison, select.where]) + else: + select.where = comparison + + return select + def join_with_person_distinct_ids_table( join_to_add: LazyJoinToAdd, @@ -50,7 +76,7 @@ def join_with_person_distinct_ids_table( if not join_to_add.fields_accessed: raise ResolutionError("No fields requested from person_distinct_ids") - join_expr = ast.JoinExpr(table=select_from_person_distinct_ids_table(join_to_add.fields_accessed)) + join_expr = ast.JoinExpr(table=select_from_person_distinct_ids_table(join_to_add.fields_accessed, context, node)) join_expr.join_type = "INNER JOIN" join_expr.alias = join_to_add.to_table join_expr.constraint = ast.JoinConstraint( @@ -82,7 +108,7 @@ class PersonDistinctIdsTable(LazyTable): fields: dict[str, FieldOrTable] = PERSON_DISTINCT_IDS_FIELDS def lazy_select(self, table_to_add: LazyTableToAdd, context, node): - return select_from_person_distinct_ids_table(table_to_add.fields_accessed) + return select_from_person_distinct_ids_table(table_to_add.fields_accessed, context, node) def to_printed_clickhouse(self, context): return "person_distinct_id2" diff --git a/posthog/hogql/database/schema/persons.py b/posthog/hogql/database/schema/persons.py index 54cf36645f506..03a3e4a75ed5c 100644 --- a/posthog/hogql/database/schema/persons.py +++ b/posthog/hogql/database/schema/persons.py @@ -21,6 +21,7 @@ from posthog.hogql.database.schema.util.where_clause_extractor import WhereClauseExtractor from posthog.hogql.database.schema.persons_pdi import PersonsPDITable, persons_pdi_join from posthog.hogql.errors import ResolutionError +from posthog.hogql.visitor import clone_expr from posthog.models.organization import Organization from posthog.schema import PersonsArgMaxVersion @@ -98,6 +99,24 @@ def select_from_persons_table(join_or_table: LazyJoinToAdd | LazyTableToAdd, con elif where: select.where = where + if "person_ids" in node.type.ctes: + comparison = clone_expr( + ast.CompareOperation( + op=ast.CompareOperationOp.In, + left=ast.Field(chain=["id"], type=ast.FieldType(name="id", table_type=PersonsTable)), + right=ast.SelectQuery( + select=[ast.Field(chain=["person_id"])], + select_from=ast.JoinExpr(table=ast.Field(chain=["person_ids"])), + ), + ), + clear_types=True, + clear_locations=True, + ) + if select.where: + select.where = And(exprs=[comparison, select.where]) + else: + select.where = comparison + return select diff --git a/posthog/hogql/database/schema/util/where_clause_extractor.py b/posthog/hogql/database/schema/util/where_clause_extractor.py index 414431e4eb5d8..4f0096af9ff53 100644 --- a/posthog/hogql/database/schema/util/where_clause_extractor.py +++ b/posthog/hogql/database/schema/util/where_clause_extractor.py @@ -6,7 +6,6 @@ from posthog.hogql.ast import CompareOperationOp, ArithmeticOperationOp from posthog.hogql.context import HogQLContext from posthog.hogql.database.models import DatabaseField, LazyJoinToAdd, LazyTableToAdd -from posthog.hogql.database.schema.person_distinct_ids import PersonDistinctIdsTable from posthog.hogql.visitor import clone_expr, CloningVisitor, Visitor, TraversingVisitor @@ -66,41 +65,11 @@ def add_local_tables(self, join_or_table: LazyJoinToAdd | LazyTableToAdd): def get_inner_where(self, select_query: ast.SelectQuery) -> Optional[ast.Expr]: """Return the where clause that should be applied to the inner table. If None is returned, no pre-filtering is possible.""" - - wheres = [] - - # If CTEs exist for what we're looking for, apply them - from posthog.hogql.database.schema.persons import PersonsTable - - for table in self.tracked_tables: - if isinstance(table, PersonsTable): - if "person_ids" in select_query.type.ctes: - # wheres.append(parse_expr("persons.id IN person_ids")) - wheres.append( - ast.CompareOperation( - op=ast.CompareOperationOp.In, - left=ast.Field(chain=["id"], type=ast.FieldType(name="id", table_type=table)), - right=ast.SelectQuery( - select=[ast.Field(chain=["person_id"])], - select_from=ast.JoinExpr(table=ast.Field(chain=["person_ids"])), - ), - ) - ) - if isinstance(table, PersonDistinctIdsTable): - if "distinct_ids" in select_query.type.ctes: - # wheres.append(parse_expr("persons.id IN person_ids")) - wheres.append( - ast.CompareOperation( - op=ast.CompareOperationOp.In, - left=ast.Field(chain=["id"], type=ast.FieldType(name="id", table_type=table)), - right=ast.SelectQuery( - select=[ast.Field(chain=["distinct_id"])], - select_from=ast.JoinExpr(table=ast.Field(chain=["distinct_ids"])), - ), - ) - ) + if not select_query.where and not select_query.prewhere: + return None # visit the where clause + wheres = [] if select_query.where: wheres.append(select_query.where) if select_query.prewhere: @@ -111,8 +80,8 @@ def get_inner_where(self, select_query: ast.SelectQuery) -> Optional[ast.Expr]: else: where = self.visit(ast.And(exprs=wheres)) - # if isinstance(where, ast.Constant): - # return None + if isinstance(where, ast.Constant): + return None return clone_expr(where, clear_types=True, clear_locations=True) @@ -221,8 +190,7 @@ def visit_compare_operation(self, node: ast.CompareOperation) -> ast.Expr: def visit_select_query(self, node: ast.SelectQuery) -> ast.Expr: # going too deep, bail - # return ast.Constant(value=True) - return node + return ast.Constant(value=True) def visit_arithmetic_operation(self, node: ast.ArithmeticOperation) -> ast.Expr: # don't even try to handle complex logic @@ -299,8 +267,6 @@ def visit_field(self, node: ast.Field) -> ast.Expr: chain_length = 1 new_field.chain = new_field.chain[-chain_length:] return new_field - if isinstance(node, ast.Field): - return node return ast.Constant(value=self.tombstone_string) def visit_constant(self, node: ast.Constant) -> ast.Expr: diff --git a/posthog/hogql/visitor.py b/posthog/hogql/visitor.py index 4c428c311f912..d03e691b640ec 100644 --- a/posthog/hogql/visitor.py +++ b/posthog/hogql/visitor.py @@ -347,15 +347,12 @@ def visit_arithmetic_operation(self, node: ast.ArithmeticOperation): ) def visit_and(self, node: ast.And): - try: - return ast.And( - start=None if self.clear_locations else node.start, - end=None if self.clear_locations else node.end, - type=None if self.clear_types else node.type, - exprs=[self.visit(expr) for expr in node.exprs], - ) - except AttributeError: - pass + return ast.And( + start=None if self.clear_locations else node.start, + end=None if self.clear_locations else node.end, + type=None if self.clear_types else node.type, + exprs=[self.visit(expr) for expr in node.exprs], + ) def visit_or(self, node: ast.Or): return ast.Or( diff --git a/posthog/hogql_queries/insights/trends/trends_actors_query_builder.py b/posthog/hogql_queries/insights/trends/trends_actors_query_builder.py index bf55a3eb82489..5cd7647b7229e 100644 --- a/posthog/hogql_queries/insights/trends/trends_actors_query_builder.py +++ b/posthog/hogql_queries/insights/trends/trends_actors_query_builder.py @@ -7,7 +7,7 @@ from dateutil.relativedelta import relativedelta from posthog.hogql import ast -from posthog.hogql.constants import LimitContext +from posthog.hogql.constants import LimitContext, HogQLQuerySettings from posthog.hogql.parser import parse_expr from posthog.hogql.property import action_to_expr, property_to_expr from posthog.hogql.timings import HogQLTimings @@ -165,7 +165,15 @@ def is_total_value(self) -> bool: return self.trends_display.is_total_value() def build_actors_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: + # Insert CTE here + events_query = self._cte_events_query() + if events_query.settings is None: + events_query.settings = HogQLQuerySettings() + events_query.settings.use_query_cache = True + + # need to modify events query to ask for correct things only return ast.SelectQuery( + ctes={"e": ast.CTE(name="e", expr=events_query, cte_type="subquery")}, select=[ ast.Field(chain=["actor_id"]), ast.Alias(alias="event_count", expr=self._get_actor_value_expr()), @@ -179,6 +187,7 @@ def _get_events_query(self) -> ast.SelectQuery: query = ast.SelectQuery( select=[ ast.Alias(alias="actor_id", expr=self._actor_id_expr()), + ast.Field(chain=["e", "distinct_id"]), ast.Field(chain=["e", "timestamp"]), ast.Field(chain=["e", "uuid"]), *([ast.Field(chain=["e", "$session_id"])] if self.include_recordings else []), @@ -193,6 +202,18 @@ def _get_events_query(self) -> ast.SelectQuery: ) return query + def _cte_events_query(self) -> ast.SelectQuery: + query = ast.SelectQuery( + select=[ast.Field(chain=["*"])], # Filter this down to save space + select_from=ast.JoinExpr( + table=ast.Field(chain=["events"]), + alias="e", + sample=self._sample_expr(), + ), + where=self._cte_events_where_expr(), + ) + return query + def _get_actor_value_expr(self) -> ast.Expr: return parse_expr("count()") @@ -217,6 +238,17 @@ def _events_where_expr(self, with_breakdown_expr: bool = True) -> ast.And: ] ) + def _cte_events_where_expr(self, with_breakdown_expr: bool = True) -> ast.And: + return ast.And( + exprs=[ + *self._entity_where_expr(), + # *self._prop_where_expr(), + *self._date_where_expr(), + *(self._breakdown_where_expr() if with_breakdown_expr else []), + *self._filter_empty_actors_expr(), + ] + ) + def _sample_expr(self) -> ast.SampleExpr | None: if self.trends_query.samplingFactor is None: return None From 94dae32ba6974d1645d456cd16ddde6ba5dfdb4d Mon Sep 17 00:00:00 2001 From: Alexander Spicer Date: Wed, 19 Jun 2024 13:59:02 -0700 Subject: [PATCH 04/15] insert into people and pdi --- .../database/schema/person_distinct_ids.py | 6 +++--- posthog/hogql/resolver.py | 3 +++ posthog/hogql/visitor.py | 3 +++ posthog/hogql_queries/actors_query_runner.py | 2 +- .../trends/trends_actors_query_builder.py | 21 ++++++++++++------- 5 files changed, 23 insertions(+), 12 deletions(-) diff --git a/posthog/hogql/database/schema/person_distinct_ids.py b/posthog/hogql/database/schema/person_distinct_ids.py index 16ba0027fff03..865df002e8981 100644 --- a/posthog/hogql/database/schema/person_distinct_ids.py +++ b/posthog/hogql/database/schema/person_distinct_ids.py @@ -44,7 +44,7 @@ def select_from_person_distinct_ids_table( deleted_field="is_deleted", ) - if "person_ids" in node.type.ctes: + if "distinct_ids" in node.type.ctes: comparison = clone_expr( ast.CompareOperation( op=ast.CompareOperationOp.In, @@ -52,8 +52,8 @@ def select_from_person_distinct_ids_table( chain=["distinct_id"], type=ast.FieldType(name="distinct_id", table_type=PersonDistinctIdsTable) ), right=ast.SelectQuery( - select=[ast.Field(chain=["person_id"])], - select_from=ast.JoinExpr(table=ast.Field(chain=["person_ids"])), + select=[ast.Field(chain=["distinct_id"])], + select_from=ast.JoinExpr(table=ast.Field(chain=["distinct_ids"])), ), ), clear_types=True, diff --git a/posthog/hogql/resolver.py b/posthog/hogql/resolver.py index cee6802a4498a..75c972750334a 100644 --- a/posthog/hogql/resolver.py +++ b/posthog/hogql/resolver.py @@ -270,6 +270,9 @@ def _asterisk_columns(self, asterisk: ast.AsteriskType) -> list[ast.Expr]: def visit_join_expr(self, node: ast.JoinExpr): """Visit each FROM and JOIN table or subquery.""" + # all expressions combined by UNION ALL can use CTEs from the first expression + # so we put these CTEs to the scope + if len(self.scopes) == 0: raise ImpossibleASTError("Unexpected JoinExpr outside a SELECT query") diff --git a/posthog/hogql/visitor.py b/posthog/hogql/visitor.py index d03e691b640ec..951cc300fa145 100644 --- a/posthog/hogql/visitor.py +++ b/posthog/hogql/visitor.py @@ -114,6 +114,9 @@ def visit_join_expr(self, node: ast.JoinExpr): def visit_select_query(self, node: ast.SelectQuery): # :TRICKY: when adding new fields, also add them to visit_select_query of resolver.py + # pass the CTEs of the node to its children + if node.type is not None and node.type.ctes is not None and hasattr(node.select_from.type, "ctes"): + node.select_from.type.ctes = {**node.type.ctes, **node.select_from.type.ctes} self.visit(node.select_from) if node.ctes is not None: for expr in list(node.ctes.values()): diff --git a/posthog/hogql_queries/actors_query_runner.py b/posthog/hogql_queries/actors_query_runner.py index 044b76ce9d934..dfa780a3ef7e3 100644 --- a/posthog/hogql_queries/actors_query_runner.py +++ b/posthog/hogql_queries/actors_query_runner.py @@ -257,7 +257,7 @@ def to_query(self) -> ast.SelectQuery: ), ) - s = parse_select("SELECT actor_id as person_id FROM source") + s = parse_select("SELECT distinct actor_id as person_id FROM source") s.select_from.table = source_query stmt = ast.SelectQuery( diff --git a/posthog/hogql_queries/insights/trends/trends_actors_query_builder.py b/posthog/hogql_queries/insights/trends/trends_actors_query_builder.py index 5cd7647b7229e..9a138adc06762 100644 --- a/posthog/hogql_queries/insights/trends/trends_actors_query_builder.py +++ b/posthog/hogql_queries/insights/trends/trends_actors_query_builder.py @@ -8,7 +8,7 @@ from posthog.hogql import ast from posthog.hogql.constants import LimitContext, HogQLQuerySettings -from posthog.hogql.parser import parse_expr +from posthog.hogql.parser import parse_expr, parse_select from posthog.hogql.property import action_to_expr, property_to_expr from posthog.hogql.timings import HogQLTimings from posthog.hogql_queries.insights.trends.aggregation_operations import AggregationOperations @@ -166,14 +166,20 @@ def is_total_value(self) -> bool: def build_actors_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: # Insert CTE here - events_query = self._cte_events_query() - if events_query.settings is None: - events_query.settings = HogQLQuerySettings() - events_query.settings.use_query_cache = True + cte_events_query = self._cte_events_query() + if cte_events_query.settings is None: + cte_events_query.settings = HogQLQuerySettings() + cte_events_query.settings.use_query_cache = True # need to modify events query to ask for correct things only + s = parse_select("SELECT distinct distinct_id as distinct_id FROM e") + s.select_from.table = cte_events_query + return ast.SelectQuery( - ctes={"e": ast.CTE(name="e", expr=events_query, cte_type="subquery")}, + ctes={ + "e": ast.CTE(name="e", expr=cte_events_query, cte_type="subquery"), + "distinct_ids": ast.CTE(name="distinct_ids", expr=s, cte_type="subquery"), + }, select=[ ast.Field(chain=["actor_id"]), ast.Alias(alias="event_count", expr=self._get_actor_value_expr()), @@ -203,7 +209,7 @@ def _get_events_query(self) -> ast.SelectQuery: return query def _cte_events_query(self) -> ast.SelectQuery: - query = ast.SelectQuery( + return ast.SelectQuery( select=[ast.Field(chain=["*"])], # Filter this down to save space select_from=ast.JoinExpr( table=ast.Field(chain=["events"]), @@ -212,7 +218,6 @@ def _cte_events_query(self) -> ast.SelectQuery: ), where=self._cte_events_where_expr(), ) - return query def _get_actor_value_expr(self) -> ast.Expr: return parse_expr("count()") From 19b45e7853631146b5dbc31f54affa5c80cd972f Mon Sep 17 00:00:00 2001 From: Alexander Spicer Date: Wed, 19 Jun 2024 14:00:29 -0700 Subject: [PATCH 05/15] remove it --- posthog/hogql/resolver.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/posthog/hogql/resolver.py b/posthog/hogql/resolver.py index 75c972750334a..cee6802a4498a 100644 --- a/posthog/hogql/resolver.py +++ b/posthog/hogql/resolver.py @@ -270,9 +270,6 @@ def _asterisk_columns(self, asterisk: ast.AsteriskType) -> list[ast.Expr]: def visit_join_expr(self, node: ast.JoinExpr): """Visit each FROM and JOIN table or subquery.""" - # all expressions combined by UNION ALL can use CTEs from the first expression - # so we put these CTEs to the scope - if len(self.scopes) == 0: raise ImpossibleASTError("Unexpected JoinExpr outside a SELECT query") From 8ae3247c2f380a32de8bc4adfd57b05e663c0fea Mon Sep 17 00:00:00 2001 From: Alexander Spicer Date: Thu, 20 Jun 2024 07:28:51 -0700 Subject: [PATCH 06/15] settings --- posthog/hogql_queries/actors_query_runner.py | 10 ++++++---- .../insights/test/test_insight_actors_query_runner.py | 2 ++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/posthog/hogql_queries/actors_query_runner.py b/posthog/hogql_queries/actors_query_runner.py index dfa780a3ef7e3..b221158d495bf 100644 --- a/posthog/hogql_queries/actors_query_runner.py +++ b/posthog/hogql_queries/actors_query_runner.py @@ -231,12 +231,14 @@ def to_query(self) -> ast.SelectQuery: order_by = [] with self.timings.measure("select"): - # Insert CTE here assert self.source_query_runner is not None # For type checking source_query = self.source_query_runner.to_actors_query() - if source_query.settings is None: - source_query.settings = HogQLQuerySettings() - source_query.settings.use_query_cache = True + + # SelectUnionQuery (used by Stickiness) doesn't have settings + if hasattr(source_query, "settings"): + if source_query.settings is None: + source_query.settings = HogQLQuerySettings() + source_query.settings.use_query_cache = True source_id_chain = self.source_id_column(source_query) source_alias = "source" diff --git a/posthog/hogql_queries/insights/test/test_insight_actors_query_runner.py b/posthog/hogql_queries/insights/test/test_insight_actors_query_runner.py index eadae733fcfdf..4414614434b26 100644 --- a/posthog/hogql_queries/insights/test/test_insight_actors_query_runner.py +++ b/posthog/hogql_queries/insights/test/test_insight_actors_query_runner.py @@ -81,6 +81,7 @@ def select(self, query: str, placeholders: Optional[dict[str, Any]] = None): modifiers=HogQLQueryModifiers(optimizeJoinedFilters=True), ) + @snapshot_clickhouse_queries def test_insight_persons_lifecycle_query(self): self._create_test_events() self.team.timezone = "US/Pacific" @@ -163,6 +164,7 @@ def test_insight_persons_lifecycle_query_week_sunday(self): self.assertEqual([("p1",), ("p2",)], response.results) + @snapshot_clickhouse_queries def test_insight_persons_stickiness_query(self): self._create_test_events() self.team.timezone = "US/Pacific" From 31fe8573c6126233ec8a246e8628bcad46de9eff Mon Sep 17 00:00:00 2001 From: Alexander Spicer Date: Thu, 20 Jun 2024 08:05:59 -0700 Subject: [PATCH 07/15] limit --- posthog/hogql_queries/actors_query_runner.py | 27 +++++++++++++------ .../test/test_insight_actors_query_runner.py | 3 +++ 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/posthog/hogql_queries/actors_query_runner.py b/posthog/hogql_queries/actors_query_runner.py index b221158d495bf..bff0c17ef0b70 100644 --- a/posthog/hogql_queries/actors_query_runner.py +++ b/posthog/hogql_queries/actors_query_runner.py @@ -9,7 +9,14 @@ from posthog.hogql_queries.insights.insight_actors_query_runner import InsightActorsQueryRunner from posthog.hogql_queries.insights.paginators import HogQLHasMorePaginator from posthog.hogql_queries.query_runner import QueryRunner, get_query_runner -from posthog.schema import ActorsQuery, ActorsQueryResponse, CachedActorsQueryResponse, DashboardFilter +from posthog.schema import ( + ActorsQuery, + ActorsQueryResponse, + CachedActorsQueryResponse, + DashboardFilter, + LifecycleQuery, + StickinessQuery, +) class ActorsQueryRunner(QueryRunner): @@ -259,15 +266,19 @@ def to_query(self) -> ast.SelectQuery: ), ) - s = parse_select("SELECT distinct actor_id as person_id FROM source") - s.select_from.table = source_query + ctes = { + source_alias: ast.CTE(name=source_alias, expr=source_query, cte_type="subquery"), + } + if isinstance(self.strategy, ActorStrategy) and any( + isinstance(x, C) for x in [self.query.source.source] for C in (LifecycleQuery, StickinessQuery) + ): + s = parse_select("SELECT distinct actor_id as person_id FROM source") + s.select_from.table = source_query + # How to get rid of the extra superfluous select + ctes["person_ids"] = ast.CTE(name="person_ids", expr=s, cte_type="subquery") stmt = ast.SelectQuery( - ctes={ - source_alias: ast.CTE(name=source_alias, expr=source_query, cte_type="subquery"), - # How to get rid of the extra superfluous select - "person_ids": ast.CTE(name="person_ids", expr=s, cte_type="subquery"), - }, + ctes=ctes, select=columns, select_from=join_expr, where=where, diff --git a/posthog/hogql_queries/insights/test/test_insight_actors_query_runner.py b/posthog/hogql_queries/insights/test/test_insight_actors_query_runner.py index 4414614434b26..d258967ab4e2b 100644 --- a/posthog/hogql_queries/insights/test/test_insight_actors_query_runner.py +++ b/posthog/hogql_queries/insights/test/test_insight_actors_query_runner.py @@ -187,6 +187,7 @@ def test_insight_persons_stickiness_query(self): self.assertEqual([("p2",)], response.results) + @snapshot_clickhouse_queries def test_insight_persons_stickiness_groups_query(self): self._create_test_groups() self._create_test_events() @@ -233,6 +234,7 @@ def test_insight_persons_trends_query(self): self.assertEqual([("p2",)], response.results) + @snapshot_clickhouse_queries def test_insight_persons_trends_groups_query(self): self._create_test_groups() self._create_test_events() @@ -256,6 +258,7 @@ def test_insight_persons_trends_groups_query(self): self.assertEqual([("org1",)], response.results) + @snapshot_clickhouse_queries def test_insight_persons_funnels_query(self): self._create_test_events() self.team.timezone = "US/Pacific" From ec6a5f6773beb04db49db87c92f9af2bb7ed82dd Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 20 Jun 2024 15:16:29 +0000 Subject: [PATCH 08/15] Update query snapshots --- .../test/__snapshots__/test_funnel.ambr | 8 +- .../test_funnel_correlation.ambr | 80 +++++++++---------- .../test_funnel_correlations_persons.ambr | 10 +-- .../__snapshots__/test_funnel_persons.ambr | 6 +- .../test_funnel_strict_persons.ambr | 6 +- 5 files changed, 55 insertions(+), 55 deletions(-) diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr index d9e0e5b8c2480..caf2492fc802f 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr @@ -183,7 +183,7 @@ HAVING ifNull(equals(steps, max_steps), isNull(steps) and isNull(max_steps))) WHERE ifNull(in(steps, [2, 3]), 0) - ORDER BY aggregation_target ASC) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT argMax(person.created_at, person.version) AS created_at, person.id AS id @@ -604,7 +604,7 @@ HAVING ifNull(equals(steps, max_steps), isNull(steps) and isNull(max_steps))) WHERE ifNull(in(steps, [1, 2, 3]), 0) - ORDER BY aggregation_target ASC) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT argMax(person.created_at, person.version) AS created_at, person.id AS id @@ -723,7 +723,7 @@ HAVING ifNull(equals(steps, max_steps), isNull(steps) and isNull(max_steps))) WHERE ifNull(in(steps, [2, 3]), 0) - ORDER BY aggregation_target ASC) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT argMax(person.created_at, person.version) AS created_at, person.id AS id @@ -842,7 +842,7 @@ HAVING ifNull(equals(steps, max_steps), isNull(steps) and isNull(max_steps))) WHERE ifNull(in(steps, [3]), 0) - ORDER BY aggregation_target ASC) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT argMax(person.created_at, person.version) AS created_at, person.id AS id diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlation.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlation.ambr index 4d1ba34adce68..7fb9d2a107cbd 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlation.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlation.ambr @@ -431,7 +431,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT person.id AS id FROM person @@ -580,7 +580,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT person.id AS id FROM person @@ -729,7 +729,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT person.id AS id FROM person @@ -878,7 +878,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT person.id AS id FROM person @@ -1190,7 +1190,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT person.id AS id FROM person @@ -1339,7 +1339,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT person.id AS id FROM person @@ -1488,7 +1488,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT person.id AS id FROM person @@ -1637,7 +1637,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT person.id AS id FROM person @@ -2178,7 +2178,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'positively_related'), ifNull(equals(funnel_actors.steps, 2), 0)) GROUP BY actor_id - ORDER BY actor_id ASC) AS source + ORDER BY actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -2294,7 +2294,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'positively_related'), ifNull(notEquals(funnel_actors.steps, 2), 1)) GROUP BY actor_id - ORDER BY actor_id ASC) AS source + ORDER BY actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -2410,7 +2410,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'negatively_related'), ifNull(equals(funnel_actors.steps, 2), 0)) GROUP BY actor_id - ORDER BY actor_id ASC) AS source + ORDER BY actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -2526,7 +2526,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'negatively_related'), ifNull(notEquals(funnel_actors.steps, 2), 1)) GROUP BY actor_id - ORDER BY actor_id ASC) AS source + ORDER BY actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -2791,7 +2791,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'negatively_related'), ifNull(equals(funnel_actors.steps, 2), 0)) GROUP BY actor_id - ORDER BY actor_id ASC) AS source + ORDER BY actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -2907,7 +2907,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'negatively_related'), ifNull(notEquals(funnel_actors.steps, 2), 1)) GROUP BY actor_id - ORDER BY actor_id ASC) AS source + ORDER BY actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -3156,7 +3156,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'positively_related'), ifNull(equals(funnel_actors.steps, 2), 0)) GROUP BY actor_id - ORDER BY actor_id ASC) AS source + ORDER BY actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -3272,7 +3272,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'positively_related'), ifNull(notEquals(funnel_actors.steps, 2), 1)) GROUP BY actor_id - ORDER BY actor_id ASC) AS source + ORDER BY actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -3388,7 +3388,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'negatively_related'), ifNull(equals(funnel_actors.steps, 2), 0)) GROUP BY actor_id - ORDER BY actor_id ASC) AS source + ORDER BY actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -3504,7 +3504,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'negatively_related'), ifNull(notEquals(funnel_actors.steps, 2), 1)) GROUP BY actor_id - ORDER BY actor_id ASC) AS source + ORDER BY actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -3769,7 +3769,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'negatively_related'), ifNull(equals(funnel_actors.steps, 2), 0)) GROUP BY actor_id - ORDER BY actor_id ASC) AS source + ORDER BY actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -3885,7 +3885,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'negatively_related'), ifNull(notEquals(funnel_actors.steps, 2), 1)) GROUP BY actor_id - ORDER BY actor_id ASC) AS source + ORDER BY actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -4156,7 +4156,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -4279,7 +4279,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -4402,7 +4402,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -4525,7 +4525,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -4944,7 +4944,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -5067,7 +5067,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -5190,7 +5190,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -5313,7 +5313,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -5732,7 +5732,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -5855,7 +5855,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -5978,7 +5978,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -6101,7 +6101,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -6520,7 +6520,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -6643,7 +6643,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -6766,7 +6766,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -6889,7 +6889,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -7308,7 +7308,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -7431,7 +7431,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -7554,7 +7554,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -7677,7 +7677,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlations_persons.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlations_persons.ambr index 4f6c68ea6c6b9..999736422c8ed 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlations_persons.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlations_persons.ambr @@ -111,7 +111,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(event__pdi.person_id, funnel_actors.actor_id) WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC')))), notIn(event.event, ['$pageview', 'insight analyzed']), equals(event.event, 'insight loaded'), ifNull(equals(funnel_actors.steps, 2), 0)) GROUP BY actor_id - ORDER BY actor_id ASC) AS source + ORDER BY actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT person.id AS id FROM person @@ -323,7 +323,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(event__pdi.person_id, funnel_actors.actor_id) WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC')))), notIn(event.event, ['$pageview', 'insight analyzed', 'insight updated']), equals(event.event, 'insight loaded'), ifNull(notEquals(funnel_actors.steps, 3), 1)) GROUP BY actor_id - ORDER BY actor_id ASC) AS source + ORDER BY actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT person.id AS id FROM person @@ -472,7 +472,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT person.id AS id FROM person @@ -621,7 +621,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT person.id AS id FROM person @@ -770,7 +770,7 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT person.id AS id FROM person diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_persons.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_persons.ambr index 18bfe7e9fb732..65ddb7d8307e6 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_persons.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_persons.ambr @@ -153,7 +153,7 @@ HAVING ifNull(equals(steps, max_steps), isNull(steps) and isNull(max_steps))) WHERE ifNull(in(steps, [1, 2, 3]), 0) - ORDER BY aggregation_target ASC) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT person.id AS id FROM person @@ -341,7 +341,7 @@ HAVING ifNull(equals(steps, max_steps), isNull(steps) and isNull(max_steps))) WHERE ifNull(in(steps, [2, 3]), 0) - ORDER BY aggregation_target ASC) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT person.id AS id FROM person @@ -529,7 +529,7 @@ HAVING ifNull(equals(steps, max_steps), isNull(steps) and isNull(max_steps))) WHERE ifNull(equals(steps, 2), 0) - ORDER BY aggregation_target ASC) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT person.id AS id FROM person diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_strict_persons.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_strict_persons.ambr index 49b02ae1302e5..4427dc79c92d0 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_strict_persons.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_strict_persons.ambr @@ -113,7 +113,7 @@ HAVING ifNull(equals(steps, max_steps), isNull(steps) and isNull(max_steps))) WHERE ifNull(in(steps, [1, 2, 3]), 0) - ORDER BY aggregation_target ASC) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT person.id AS id FROM person @@ -261,7 +261,7 @@ HAVING ifNull(equals(steps, max_steps), isNull(steps) and isNull(max_steps))) WHERE ifNull(in(steps, [2, 3]), 0) - ORDER BY aggregation_target ASC) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT person.id AS id FROM person @@ -409,7 +409,7 @@ HAVING ifNull(equals(steps, max_steps), isNull(steps) and isNull(max_steps))) WHERE ifNull(equals(steps, 2), 0) - ORDER BY aggregation_target ASC) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT person.id AS id FROM person From 2580a7faaa7adc4f449e7a4e6315b9d5785ad722 Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 20 Jun 2024 15:17:00 +0000 Subject: [PATCH 09/15] Update query snapshots --- .../test/__snapshots__/test_funnel_trends_persons.ambr | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_trends_persons.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_trends_persons.ambr index 07d851cf6d324..1f9d716c5b54b 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_trends_persons.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_trends_persons.ambr @@ -139,7 +139,7 @@ GROUP BY aggregation_target, entrance_period_start) WHERE ifNull(greaterOrEquals(steps_completed, 2), 0) - ORDER BY aggregation_target ASC) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT person.id AS id FROM person @@ -313,7 +313,7 @@ GROUP BY aggregation_target, entrance_period_start) WHERE and(ifNull(greaterOrEquals(steps_completed, 1), 0), ifNull(less(steps_completed, 3), 0)) - ORDER BY aggregation_target ASC) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT person.id AS id FROM person @@ -487,7 +487,7 @@ GROUP BY aggregation_target, entrance_period_start) WHERE ifNull(greaterOrEquals(steps_completed, 3), 0) - ORDER BY aggregation_target ASC) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1) AS source INNER JOIN (SELECT person.id AS id FROM person From 42bfb5d66f2022cb9a87972d28977e26c83b638f Mon Sep 17 00:00:00 2001 From: Alexander Spicer Date: Thu, 20 Jun 2024 08:45:30 -0700 Subject: [PATCH 10/15] hm --- posthog/hogql_queries/actors_query_runner.py | 16 +++++++++------- .../test/test_insight_actors_query_runner.py | 2 +- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/posthog/hogql_queries/actors_query_runner.py b/posthog/hogql_queries/actors_query_runner.py index bff0c17ef0b70..b41e2e78a76b8 100644 --- a/posthog/hogql_queries/actors_query_runner.py +++ b/posthog/hogql_queries/actors_query_runner.py @@ -16,6 +16,7 @@ DashboardFilter, LifecycleQuery, StickinessQuery, + TrendsQuery, ) @@ -269,13 +270,14 @@ def to_query(self) -> ast.SelectQuery: ctes = { source_alias: ast.CTE(name=source_alias, expr=source_query, cte_type="subquery"), } - if isinstance(self.strategy, ActorStrategy) and any( - isinstance(x, C) for x in [self.query.source.source] for C in (LifecycleQuery, StickinessQuery) - ): - s = parse_select("SELECT distinct actor_id as person_id FROM source") - s.select_from.table = source_query - # How to get rid of the extra superfluous select - ctes["person_ids"] = ast.CTE(name="person_ids", expr=s, cte_type="subquery") + if True: + if isinstance(self.strategy, PersonStrategy) and any( + isinstance(x, C) for x in [self.query.source.source] for C in (TrendsQuery,) + ): + s = parse_select("SELECT distinct actor_id as person_id FROM source") + s.select_from.table = source_query + # How to get rid of the extra superfluous select + ctes["person_ids"] = ast.CTE(name="person_ids", expr=s, cte_type="subquery") stmt = ast.SelectQuery( ctes=ctes, diff --git a/posthog/hogql_queries/insights/test/test_insight_actors_query_runner.py b/posthog/hogql_queries/insights/test/test_insight_actors_query_runner.py index d258967ab4e2b..0f475dd8f069c 100644 --- a/posthog/hogql_queries/insights/test/test_insight_actors_query_runner.py +++ b/posthog/hogql_queries/insights/test/test_insight_actors_query_runner.py @@ -106,7 +106,7 @@ def test_insight_persons_lifecycle_query(self): {"date_from": ast.Constant(value=date_from), "date_to": ast.Constant(value=date_to)}, ) - self.assertEqual([("p1",)], response.results) + # self.assertEqual([("p1",)], response.results) def test_insight_persons_lifecycle_query_week_monday(self): self._create_test_events() From 526ea66ef7438187fd3e3e0aab91e5a6f671af9c Mon Sep 17 00:00:00 2001 From: Alexander Spicer Date: Thu, 20 Jun 2024 09:48:56 -0700 Subject: [PATCH 11/15] push --- posthog/hogql/constants.py | 1 + posthog/hogql/database/schema/persons.py | 37 ++++++++++--------- posthog/hogql/visitor.py | 2 +- posthog/hogql_queries/actors_query_runner.py | 2 + .../test/test_insight_actors_query_runner.py | 35 +++++++++++++++--- .../trends/trends_actors_query_builder.py | 5 +++ 6 files changed, 58 insertions(+), 24 deletions(-) diff --git a/posthog/hogql/constants.py b/posthog/hogql/constants.py index 5d27150cc1a1c..400b1385499e3 100644 --- a/posthog/hogql/constants.py +++ b/posthog/hogql/constants.py @@ -93,6 +93,7 @@ class HogQLQuerySettings(BaseModel): model_config = ConfigDict(extra="forbid") optimize_aggregation_in_order: Optional[bool] = None use_query_cache: Optional[bool] = None + query_cache_ttl: Optional[int] = None # Settings applied on top of all HogQL queries. diff --git a/posthog/hogql/database/schema/persons.py b/posthog/hogql/database/schema/persons.py index 03a3e4a75ed5c..b25247abba615 100644 --- a/posthog/hogql/database/schema/persons.py +++ b/posthog/hogql/database/schema/persons.py @@ -1,6 +1,7 @@ from typing import cast import posthoganalytics +from hogql_parser import parse_expr from posthog.hogql.ast import SelectQuery, And from posthog.hogql.constants import HogQLQuerySettings from posthog.hogql.context import HogQLContext @@ -60,10 +61,22 @@ def select_from_persons_table(join_or_table: LazyJoinToAdd | LazyTableToAdd, con SELECT id FROM raw_persons WHERE (id, version) IN ( SELECT id, max(version) as version FROM raw_persons + WHERE raw_persons.id in (select person_id from person_ids) GROUP BY id HAVING equals(argMax(raw_persons.is_deleted, raw_persons.version), 0) AND argMax(raw_persons.created_at, raw_persons.version) < now() + interval 1 day ) + + """ + if "person_ids" in node.type.ctes + else """ + SELECT id FROM raw_persons WHERE (id, version) IN ( + SELECT id, max(version) as version + FROM raw_persons + GROUP BY id + HAVING equals(argMax(raw_persons.is_deleted, raw_persons.version), 0) + AND argMax(raw_persons.created_at, raw_persons.version) < now() + interval 1 day + ) """ ), ) @@ -89,6 +102,12 @@ def select_from_persons_table(join_or_table: LazyJoinToAdd | LazyTableToAdd, con timestamp_field_to_clamp="created_at", ) select.settings = HogQLQuerySettings(optimize_aggregation_in_order=True) + if "person_ids" in node.type.ctes: + expr = parse_expr("raw_persons.id in (select person_id from person_ids)") + if select.where: + select.where = And(exprs=[select.where, expr]) + else: + select.where = expr if context.modifiers.optimizeJoinedFilters: extractor = WhereClauseExtractor(context) @@ -99,24 +118,6 @@ def select_from_persons_table(join_or_table: LazyJoinToAdd | LazyTableToAdd, con elif where: select.where = where - if "person_ids" in node.type.ctes: - comparison = clone_expr( - ast.CompareOperation( - op=ast.CompareOperationOp.In, - left=ast.Field(chain=["id"], type=ast.FieldType(name="id", table_type=PersonsTable)), - right=ast.SelectQuery( - select=[ast.Field(chain=["person_id"])], - select_from=ast.JoinExpr(table=ast.Field(chain=["person_ids"])), - ), - ), - clear_types=True, - clear_locations=True, - ) - if select.where: - select.where = And(exprs=[comparison, select.where]) - else: - select.where = comparison - return select diff --git a/posthog/hogql/visitor.py b/posthog/hogql/visitor.py index 951cc300fa145..cc27920dd3b81 100644 --- a/posthog/hogql/visitor.py +++ b/posthog/hogql/visitor.py @@ -115,7 +115,7 @@ def visit_join_expr(self, node: ast.JoinExpr): def visit_select_query(self, node: ast.SelectQuery): # :TRICKY: when adding new fields, also add them to visit_select_query of resolver.py # pass the CTEs of the node to its children - if node.type is not None and node.type.ctes is not None and hasattr(node.select_from.type, "ctes"): + if node.type is not None and node.type.ctes is not None and node.select_from is not None and hasattr(node.select_from.type, "ctes"): node.select_from.type.ctes = {**node.type.ctes, **node.select_from.type.ctes} self.visit(node.select_from) if node.ctes is not None: diff --git a/posthog/hogql_queries/actors_query_runner.py b/posthog/hogql_queries/actors_query_runner.py index b41e2e78a76b8..a717f9bf3fad0 100644 --- a/posthog/hogql_queries/actors_query_runner.py +++ b/posthog/hogql_queries/actors_query_runner.py @@ -18,6 +18,7 @@ StickinessQuery, TrendsQuery, ) +from posthog.settings import HOGQL_INCREASED_MAX_EXECUTION_TIME class ActorsQueryRunner(QueryRunner): @@ -247,6 +248,7 @@ def to_query(self) -> ast.SelectQuery: if source_query.settings is None: source_query.settings = HogQLQuerySettings() source_query.settings.use_query_cache = True + source_query.settings.query_cache_ttl = HOGQL_INCREASED_MAX_EXECUTION_TIME source_id_chain = self.source_id_column(source_query) source_alias = "source" diff --git a/posthog/hogql_queries/insights/test/test_insight_actors_query_runner.py b/posthog/hogql_queries/insights/test/test_insight_actors_query_runner.py index 0f475dd8f069c..bab2ecd87692c 100644 --- a/posthog/hogql_queries/insights/test/test_insight_actors_query_runner.py +++ b/posthog/hogql_queries/insights/test/test_insight_actors_query_runner.py @@ -7,7 +7,7 @@ from posthog.models.group.util import create_group from posthog.models.group_type_mapping import GroupTypeMapping from posthog.models.team import WeekStartDay -from posthog.schema import HogQLQueryModifiers +from posthog.schema import HogQLQueryModifiers, PersonsArgMaxVersion from posthog.test.base import ( APIBaseTest, ClickhouseTestMixin, @@ -71,14 +71,14 @@ def _create_test_events(self): ] ) - def select(self, query: str, placeholders: Optional[dict[str, Any]] = None): + def select(self, query: str, placeholders: Optional[dict[str, Any]] = None, modifiers: dict = {}): if placeholders is None: placeholders = {} return execute_hogql_query( query=query, team=self.team, placeholders=placeholders, - modifiers=HogQLQueryModifiers(optimizeJoinedFilters=True), + modifiers=HogQLQueryModifiers(**modifiers), ) @snapshot_clickhouse_queries @@ -106,7 +106,7 @@ def test_insight_persons_lifecycle_query(self): {"date_from": ast.Constant(value=date_from), "date_to": ast.Constant(value=date_to)}, ) - # self.assertEqual([("p1",)], response.results) + self.assertEqual([("p1",)], response.results) def test_insight_persons_lifecycle_query_week_monday(self): self._create_test_events() @@ -212,7 +212,7 @@ def test_insight_persons_stickiness_groups_query(self): self.assertEqual([("org1",)], response.results) @snapshot_clickhouse_queries - def test_insight_persons_trends_query(self): + def test_insight_persons_trends_query_with_argmaxV2(self): self._create_test_events() self.team.timezone = "US/Pacific" self.team.save() @@ -229,7 +229,32 @@ def test_insight_persons_trends_query(self): ) + """, + modifiers={"personsArgMaxVersion": PersonsArgMaxVersion.V2}, + ) + + self.assertEqual([("p2",)], response.results) + + @snapshot_clickhouse_queries + def test_insight_persons_trends_query_with_argmaxV1(self): + self._create_test_events() + self.team.timezone = "US/Pacific" + self.team.save() + + response = self.select( """ + select * from ( + + + } + series={[]} + /> + + + ) + """, + modifiers={"personsArgMaxVersion": PersonsArgMaxVersion.V1}, ) self.assertEqual([("p2",)], response.results) diff --git a/posthog/hogql_queries/insights/trends/trends_actors_query_builder.py b/posthog/hogql_queries/insights/trends/trends_actors_query_builder.py index 9a138adc06762..485ba0d1a265b 100644 --- a/posthog/hogql_queries/insights/trends/trends_actors_query_builder.py +++ b/posthog/hogql_queries/insights/trends/trends_actors_query_builder.py @@ -29,6 +29,7 @@ TrendsQuery, CompareFilter, ) +from posthog.settings import HOGQL_INCREASED_MAX_EXECUTION_TIME class TrendsActorsQueryBuilder: @@ -170,6 +171,10 @@ def build_actors_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: if cte_events_query.settings is None: cte_events_query.settings = HogQLQuerySettings() cte_events_query.settings.use_query_cache = True + # Cache these specific queries for as long as we allow queries to run for + # This means that a refresh of this query won't change data more than once every 10 minutes + # But we shouldn't be allowing refreshing that often anyways + cte_events_query.settings.query_cache_ttl = HOGQL_INCREASED_MAX_EXECUTION_TIME # need to modify events query to ask for correct things only s = parse_select("SELECT distinct distinct_id as distinct_id FROM e") From ad5d43a30aa366fbd57c82c1a24aa28a2fd5dd8a Mon Sep 17 00:00:00 2001 From: Alexander Spicer Date: Thu, 20 Jun 2024 09:50:52 -0700 Subject: [PATCH 12/15] push --- posthog/hogql/database/schema/persons.py | 3 +-- .../insights/test/test_insight_actors_query_runner.py | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/posthog/hogql/database/schema/persons.py b/posthog/hogql/database/schema/persons.py index b25247abba615..fb249b8a46396 100644 --- a/posthog/hogql/database/schema/persons.py +++ b/posthog/hogql/database/schema/persons.py @@ -66,7 +66,6 @@ def select_from_persons_table(join_or_table: LazyJoinToAdd | LazyTableToAdd, con HAVING equals(argMax(raw_persons.is_deleted, raw_persons.version), 0) AND argMax(raw_persons.created_at, raw_persons.version) < now() + interval 1 day ) - """ if "person_ids" in node.type.ctes else """ @@ -76,7 +75,7 @@ def select_from_persons_table(join_or_table: LazyJoinToAdd | LazyTableToAdd, con GROUP BY id HAVING equals(argMax(raw_persons.is_deleted, raw_persons.version), 0) AND argMax(raw_persons.created_at, raw_persons.version) < now() + interval 1 day - ) + ) """ ), ) diff --git a/posthog/hogql_queries/insights/test/test_insight_actors_query_runner.py b/posthog/hogql_queries/insights/test/test_insight_actors_query_runner.py index bab2ecd87692c..51e078def6db0 100644 --- a/posthog/hogql_queries/insights/test/test_insight_actors_query_runner.py +++ b/posthog/hogql_queries/insights/test/test_insight_actors_query_runner.py @@ -71,14 +71,14 @@ def _create_test_events(self): ] ) - def select(self, query: str, placeholders: Optional[dict[str, Any]] = None, modifiers: dict = {}): + def select(self, query: str, placeholders: Optional[dict[str, Any]] = None, modifiers: dict = None): if placeholders is None: placeholders = {} return execute_hogql_query( query=query, team=self.team, placeholders=placeholders, - modifiers=HogQLQueryModifiers(**modifiers), + modifiers=HogQLQueryModifiers(**modifiers) if modifiers else None, ) @snapshot_clickhouse_queries From fc65b23d9ea0c511419c78705f9662611ba90bb1 Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 20 Jun 2024 17:00:34 +0000 Subject: [PATCH 13/15] Update query snapshots --- .../test/__snapshots__/test_funnel_trends_persons.ambr | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_trends_persons.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_trends_persons.ambr index 1f9d716c5b54b..e24701e0ec0a0 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_trends_persons.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_trends_persons.ambr @@ -139,7 +139,8 @@ GROUP BY aggregation_target, entrance_period_start) WHERE ifNull(greaterOrEquals(steps_completed, 2), 0) - ORDER BY aggregation_target ASC SETTINGS use_query_cache=1) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person @@ -313,7 +314,8 @@ GROUP BY aggregation_target, entrance_period_start) WHERE and(ifNull(greaterOrEquals(steps_completed, 1), 0), ifNull(less(steps_completed, 3), 0)) - ORDER BY aggregation_target ASC SETTINGS use_query_cache=1) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person @@ -487,7 +489,8 @@ GROUP BY aggregation_target, entrance_period_start) WHERE ifNull(greaterOrEquals(steps_completed, 3), 0) - ORDER BY aggregation_target ASC SETTINGS use_query_cache=1) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person From 1947ea7dd6d1309b11c0bb1a831c378fbdd63c2e Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 20 Jun 2024 17:30:49 +0000 Subject: [PATCH 14/15] Update query snapshots --- .../test/__snapshots__/test_funnel.ambr | 12 +- .../test_funnel_correlation.ambr | 120 ++++++++++++------ .../test_funnel_correlations_persons.ambr | 15 ++- .../__snapshots__/test_funnel_persons.ambr | 9 +- .../test_funnel_strict_persons.ambr | 9 +- 5 files changed, 110 insertions(+), 55 deletions(-) diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr index caf2492fc802f..42444d841143c 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr @@ -183,7 +183,8 @@ HAVING ifNull(equals(steps, max_steps), isNull(steps) and isNull(max_steps))) WHERE ifNull(in(steps, [2, 3]), 0) - ORDER BY aggregation_target ASC SETTINGS use_query_cache=1) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT argMax(person.created_at, person.version) AS created_at, person.id AS id @@ -604,7 +605,8 @@ HAVING ifNull(equals(steps, max_steps), isNull(steps) and isNull(max_steps))) WHERE ifNull(in(steps, [1, 2, 3]), 0) - ORDER BY aggregation_target ASC SETTINGS use_query_cache=1) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT argMax(person.created_at, person.version) AS created_at, person.id AS id @@ -723,7 +725,8 @@ HAVING ifNull(equals(steps, max_steps), isNull(steps) and isNull(max_steps))) WHERE ifNull(in(steps, [2, 3]), 0) - ORDER BY aggregation_target ASC SETTINGS use_query_cache=1) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT argMax(person.created_at, person.version) AS created_at, person.id AS id @@ -842,7 +845,8 @@ HAVING ifNull(equals(steps, max_steps), isNull(steps) and isNull(max_steps))) WHERE ifNull(in(steps, [3]), 0) - ORDER BY aggregation_target ASC SETTINGS use_query_cache=1) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT argMax(person.created_at, person.version) AS created_at, person.id AS id diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlation.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlation.ambr index 7fb9d2a107cbd..ca7be9b3d7ede 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlation.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlation.ambr @@ -431,7 +431,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person @@ -580,7 +581,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person @@ -729,7 +731,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person @@ -878,7 +881,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person @@ -1190,7 +1194,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person @@ -1339,7 +1344,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person @@ -1488,7 +1494,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person @@ -1637,7 +1644,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person @@ -2178,7 +2186,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'positively_related'), ifNull(equals(funnel_actors.steps, 2), 0)) GROUP BY actor_id - ORDER BY actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -2294,7 +2303,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'positively_related'), ifNull(notEquals(funnel_actors.steps, 2), 1)) GROUP BY actor_id - ORDER BY actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -2410,7 +2420,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'negatively_related'), ifNull(equals(funnel_actors.steps, 2), 0)) GROUP BY actor_id - ORDER BY actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -2526,7 +2537,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'negatively_related'), ifNull(notEquals(funnel_actors.steps, 2), 1)) GROUP BY actor_id - ORDER BY actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -2791,7 +2803,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'negatively_related'), ifNull(equals(funnel_actors.steps, 2), 0)) GROUP BY actor_id - ORDER BY actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -2907,7 +2920,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'negatively_related'), ifNull(notEquals(funnel_actors.steps, 2), 1)) GROUP BY actor_id - ORDER BY actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -3156,7 +3170,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'positively_related'), ifNull(equals(funnel_actors.steps, 2), 0)) GROUP BY actor_id - ORDER BY actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -3272,7 +3287,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'positively_related'), ifNull(notEquals(funnel_actors.steps, 2), 1)) GROUP BY actor_id - ORDER BY actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -3388,7 +3404,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'negatively_related'), ifNull(equals(funnel_actors.steps, 2), 0)) GROUP BY actor_id - ORDER BY actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -3504,7 +3521,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'negatively_related'), ifNull(notEquals(funnel_actors.steps, 2), 1)) GROUP BY actor_id - ORDER BY actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -3769,7 +3787,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'negatively_related'), ifNull(equals(funnel_actors.steps, 2), 0)) GROUP BY actor_id - ORDER BY actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -3885,7 +3904,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`) WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'negatively_related'), ifNull(notEquals(funnel_actors.steps, 2), 1)) GROUP BY actor_id - ORDER BY actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -4156,7 +4176,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -4279,7 +4300,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -4402,7 +4424,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -4525,7 +4548,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -4944,7 +4968,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -5067,7 +5092,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -5190,7 +5216,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -5313,7 +5340,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -5732,7 +5760,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -5855,7 +5884,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -5978,7 +6008,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -6101,7 +6132,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -6520,7 +6552,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -6643,7 +6676,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -6766,7 +6800,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -6889,7 +6924,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -7308,7 +7344,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -7431,7 +7468,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -7554,7 +7592,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key @@ -7677,7 +7716,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT groups.group_type_index AS index, groups.group_key AS key diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlations_persons.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlations_persons.ambr index 999736422c8ed..c131eef69edee 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlations_persons.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlations_persons.ambr @@ -111,7 +111,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(event__pdi.person_id, funnel_actors.actor_id) WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC')))), notIn(event.event, ['$pageview', 'insight analyzed']), equals(event.event, 'insight loaded'), ifNull(equals(funnel_actors.steps, 2), 0)) GROUP BY actor_id - ORDER BY actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person @@ -323,7 +324,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(event__pdi.person_id, funnel_actors.actor_id) WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC')))), notIn(event.event, ['$pageview', 'insight analyzed', 'insight updated']), equals(event.event, 'insight loaded'), ifNull(notEquals(funnel_actors.steps, 3), 1)) GROUP BY actor_id - ORDER BY actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person @@ -472,7 +474,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person @@ -621,7 +624,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(equals(funnel_actors.steps, 2), 0) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person @@ -770,7 +774,8 @@ ORDER BY aggregation_target ASC) AS funnel_actors WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1) AS source + ORDER BY funnel_actors.actor_id ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_persons.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_persons.ambr index 65ddb7d8307e6..41d74e60eb3d7 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_persons.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_persons.ambr @@ -153,7 +153,8 @@ HAVING ifNull(equals(steps, max_steps), isNull(steps) and isNull(max_steps))) WHERE ifNull(in(steps, [1, 2, 3]), 0) - ORDER BY aggregation_target ASC SETTINGS use_query_cache=1) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person @@ -341,7 +342,8 @@ HAVING ifNull(equals(steps, max_steps), isNull(steps) and isNull(max_steps))) WHERE ifNull(in(steps, [2, 3]), 0) - ORDER BY aggregation_target ASC SETTINGS use_query_cache=1) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person @@ -529,7 +531,8 @@ HAVING ifNull(equals(steps, max_steps), isNull(steps) and isNull(max_steps))) WHERE ifNull(equals(steps, 2), 0) - ORDER BY aggregation_target ASC SETTINGS use_query_cache=1) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_strict_persons.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_strict_persons.ambr index 4427dc79c92d0..1845beb0323b1 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_strict_persons.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_strict_persons.ambr @@ -113,7 +113,8 @@ HAVING ifNull(equals(steps, max_steps), isNull(steps) and isNull(max_steps))) WHERE ifNull(in(steps, [1, 2, 3]), 0) - ORDER BY aggregation_target ASC SETTINGS use_query_cache=1) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person @@ -261,7 +262,8 @@ HAVING ifNull(equals(steps, max_steps), isNull(steps) and isNull(max_steps))) WHERE ifNull(in(steps, [2, 3]), 0) - ORDER BY aggregation_target ASC SETTINGS use_query_cache=1) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person @@ -409,7 +411,8 @@ HAVING ifNull(equals(steps, max_steps), isNull(steps) and isNull(max_steps))) WHERE ifNull(equals(steps, 2), 0) - ORDER BY aggregation_target ASC SETTINGS use_query_cache=1) AS source + ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT person.id AS id FROM person From e8bfd53435342f5845c6811178d691db66d319c1 Mon Sep 17 00:00:00 2001 From: Alexander Spicer Date: Thu, 20 Jun 2024 10:49:31 -0700 Subject: [PATCH 15/15] hm --- posthog/api/test/test_cohort.py | 4 +- posthog/hogql/visitor.py | 2 +- posthog/hogql_queries/actors_query_runner.py | 15 +++-- .../test/test_insight_actors_query_runner.py | 63 ++++++++++--------- .../test/__snapshots__/test_trends.ambr | 47 ++++++++++++-- .../trends/trends_actors_query_builder.py | 26 +++++--- posthog/test/base.py | 9 ++- 7 files changed, 112 insertions(+), 54 deletions(-) diff --git a/posthog/api/test/test_cohort.py b/posthog/api/test/test_cohort.py index 14b6b60b51484..eaf2c4a3c6f41 100644 --- a/posthog/api/test/test_cohort.py +++ b/posthog/api/test/test_cohort.py @@ -33,7 +33,7 @@ class TestCohort(TestExportMixin, ClickhouseTestMixin, APIBaseTest, QueryMatchingTest): # select all queries for snapshots def capture_select_queries(self): - return self.capture_queries(("INSERT INTO cohortpeople", "SELECT", "ALTER", "select", "DELETE")) + return self.capture_queries_startswith(("INSERT INTO cohortpeople", "SELECT", "ALTER", "select", "DELETE")) def _get_cohort_activity( self, @@ -101,7 +101,7 @@ def test_creating_update_and_calculating(self, patch_sync_execute, patch_calcula }, ) - with self.capture_queries("INSERT INTO cohortpeople") as insert_statements: + with self.capture_queries_startswith("INSERT INTO cohortpeople") as insert_statements: response = self.client.patch( f"/api/projects/{self.team.id}/cohorts/{response.json()['id']}", data={ diff --git a/posthog/hogql/visitor.py b/posthog/hogql/visitor.py index cc27920dd3b81..80e573124b307 100644 --- a/posthog/hogql/visitor.py +++ b/posthog/hogql/visitor.py @@ -114,7 +114,7 @@ def visit_join_expr(self, node: ast.JoinExpr): def visit_select_query(self, node: ast.SelectQuery): # :TRICKY: when adding new fields, also add them to visit_select_query of resolver.py - # pass the CTEs of the node to its children + # pass the CTEs of the node to select_froms (needed for nested joins to have access to CTEs) if node.type is not None and node.type.ctes is not None and node.select_from is not None and hasattr(node.select_from.type, "ctes"): node.select_from.type.ctes = {**node.type.ctes, **node.select_from.type.ctes} self.visit(node.select_from) diff --git a/posthog/hogql_queries/actors_query_runner.py b/posthog/hogql_queries/actors_query_runner.py index a717f9bf3fad0..3746d5aeeb1ae 100644 --- a/posthog/hogql_queries/actors_query_runner.py +++ b/posthog/hogql_queries/actors_query_runner.py @@ -272,14 +272,13 @@ def to_query(self) -> ast.SelectQuery: ctes = { source_alias: ast.CTE(name=source_alias, expr=source_query, cte_type="subquery"), } - if True: - if isinstance(self.strategy, PersonStrategy) and any( - isinstance(x, C) for x in [self.query.source.source] for C in (TrendsQuery,) - ): - s = parse_select("SELECT distinct actor_id as person_id FROM source") - s.select_from.table = source_query - # How to get rid of the extra superfluous select - ctes["person_ids"] = ast.CTE(name="person_ids", expr=s, cte_type="subquery") + if isinstance(self.strategy, PersonStrategy) and any( + isinstance(x, C) for x in [self.query.source.source] for C in (TrendsQuery,) + ): + s = parse_select("SELECT distinct actor_id as person_id FROM source") + s.select_from.table = source_query + # This feels like it adds one extra level of SELECT which is unnecessary + ctes["person_ids"] = ast.CTE(name="person_ids", expr=s, cte_type="subquery") stmt = ast.SelectQuery( ctes=ctes, diff --git a/posthog/hogql_queries/insights/test/test_insight_actors_query_runner.py b/posthog/hogql_queries/insights/test/test_insight_actors_query_runner.py index 51e078def6db0..3498838f0cc5e 100644 --- a/posthog/hogql_queries/insights/test/test_insight_actors_query_runner.py +++ b/posthog/hogql_queries/insights/test/test_insight_actors_query_runner.py @@ -1,4 +1,5 @@ from typing import Any, Optional +import re from freezegun import freeze_time @@ -217,23 +218,26 @@ def test_insight_persons_trends_query_with_argmaxV2(self): self.team.timezone = "US/Pacific" self.team.save() - response = self.select( - """ - select * from ( - - - } - series={[]} - /> - - + with self.capture_queries(lambda query: re.match("^SELECT\s+name\s+AS\s+name", query)) as queries: + response = self.select( + """ + select * from ( + + + } + series={[]} + /> + + + ) + """, + modifiers={"personsArgMaxVersion": PersonsArgMaxVersion.V2}, ) - """, - modifiers={"personsArgMaxVersion": PersonsArgMaxVersion.V2}, - ) self.assertEqual([("p2",)], response.results) + assert "in(distinct_id" in queries[0] + assert "in(person.id" in queries[0] @snapshot_clickhouse_queries def test_insight_persons_trends_query_with_argmaxV1(self): @@ -241,23 +245,26 @@ def test_insight_persons_trends_query_with_argmaxV1(self): self.team.timezone = "US/Pacific" self.team.save() - response = self.select( - """ - select * from ( - - - } - series={[]} - /> - - + with self.capture_queries(lambda query: re.match("^SELECT\s+name\s+AS\s+name", query)) as queries: + response = self.select( + """ + select * from ( + + + } + series={[]} + /> + + + ) + """, + modifiers={"personsArgMaxVersion": PersonsArgMaxVersion.V1}, ) - """, - modifiers={"personsArgMaxVersion": PersonsArgMaxVersion.V1}, - ) self.assertEqual([("p2",)], response.results) + assert "in(distinct_id" in queries[0] + assert "in(person.id" in queries[0] @snapshot_clickhouse_queries def test_insight_persons_trends_groups_query(self): diff --git a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr index 23ff710fea0ed..731939491b535 100644 --- a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr +++ b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr @@ -245,6 +245,7 @@ groupUniqArray(100)(tuple(timestamp, uuid, `$session_id`, `$window_id`)) AS matching_events FROM (SELECT e.person_id AS actor_id, + e.distinct_id AS distinct_id, toTimeZone(e.timestamp, 'UTC') AS timestamp, e.uuid AS uuid, e.`$session_id` AS `$session_id`, @@ -259,12 +260,28 @@ GROUP BY groups.group_type_index, groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) WHERE and(equals(e.team_id, 2), equals(e.event, 'sign up'), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-02 00:00:00.000000', 6, 'UTC')), less(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-03 00:00:00.000000', 6, 'UTC')), ifNull(equals(e__group_0.properties___industry, 'technology'), 0))) - GROUP BY actor_id) AS source + GROUP BY actor_id SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT argMax(person.created_at, person.version) AS created_at, person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(person.id, + (SELECT person_ids.person_id AS person_id + FROM + (SELECT DISTINCT actor_id AS person_id + FROM + (SELECT actor_id AS actor_id, count() AS event_count, groupUniqArray(100)(tuple(timestamp, uuid, `$session_id`, `$window_id`)) AS matching_events + FROM + (SELECT e.person_id AS actor_id, e.distinct_id AS distinct_id, e.timestamp AS timestamp, e.uuid AS uuid, e.`$session_id` AS `$session_id`, e.`$window_id` AS `$window_id` + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, groups.group_type_index AS index, groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), equals(e.event, 'sign up'), greaterOrEquals(e.timestamp, toDateTime64('2020-01-02 00:00:00.000000', 6, 'UTC')), less(e.timestamp, toDateTime64('2020-01-03 00:00:00.000000', 6, 'UTC')), ifNull(equals(e__group_0.properties___industry, 'technology'), 0))) + GROUP BY actor_id SETTINGS use_query_cache=1, query_cache_ttl=600)) AS person_ids))) GROUP BY person.id HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(person.created_at, person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) ORDER BY source.event_count DESC @@ -1050,6 +1067,7 @@ groupUniqArray(100)(tuple(timestamp, uuid, `$session_id`, `$window_id`)) AS matching_events FROM (SELECT e.person_id AS actor_id, + e.distinct_id AS distinct_id, toTimeZone(e.timestamp, 'UTC') AS timestamp, e.uuid AS uuid, e.`$session_id` AS `$session_id`, @@ -1072,12 +1090,33 @@ GROUP BY groups.group_type_index, groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) WHERE and(equals(e.team_id, 2), equals(e.event, 'sign up'), and(ifNull(equals(e__group_0.properties___industry, 'finance'), 0), ifNull(equals(e__group_2.properties___name, 'six'), 0)), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-02 00:00:00.000000', 6, 'UTC')), less(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-03 00:00:00.000000', 6, 'UTC')))) - GROUP BY actor_id) AS source + GROUP BY actor_id SETTINGS use_query_cache=1, + query_cache_ttl=600) AS source INNER JOIN (SELECT argMax(person.created_at, person.version) AS created_at, person.id AS id FROM person - WHERE equals(person.team_id, 2) + WHERE and(equals(person.team_id, 2), in(person.id, + (SELECT person_ids.person_id AS person_id + FROM + (SELECT DISTINCT actor_id AS person_id + FROM + (SELECT actor_id AS actor_id, count() AS event_count, groupUniqArray(100)(tuple(timestamp, uuid, `$session_id`, `$window_id`)) AS matching_events + FROM + (SELECT e.person_id AS actor_id, e.distinct_id AS distinct_id, e.timestamp AS timestamp, e.uuid AS uuid, e.`$session_id` AS `$session_id`, e.`$window_id` AS `$window_id` + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'name'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___name, groups.group_type_index AS index, groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 2), 0)) + GROUP BY groups.group_type_index, groups.group_key) AS e__group_2 ON equals(e.`$group_2`, e__group_2.key) + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, groups.group_type_index AS index, groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), equals(e.event, 'sign up'), and(ifNull(equals(e__group_0.properties___industry, 'finance'), 0), ifNull(equals(e__group_2.properties___name, 'six'), 0)), greaterOrEquals(e.timestamp, toDateTime64('2020-01-02 00:00:00.000000', 6, 'UTC')), less(e.timestamp, toDateTime64('2020-01-03 00:00:00.000000', 6, 'UTC')))) + GROUP BY actor_id SETTINGS use_query_cache=1, query_cache_ttl=600)) AS person_ids))) GROUP BY person.id HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(person.created_at, person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) ORDER BY source.event_count DESC diff --git a/posthog/hogql_queries/insights/trends/trends_actors_query_builder.py b/posthog/hogql_queries/insights/trends/trends_actors_query_builder.py index 485ba0d1a265b..dde7832e815d3 100644 --- a/posthog/hogql_queries/insights/trends/trends_actors_query_builder.py +++ b/posthog/hogql_queries/insights/trends/trends_actors_query_builder.py @@ -28,6 +28,8 @@ TrendsFilter, TrendsQuery, CompareFilter, + BreakdownType, + PersonPropertyFilter, ) from posthog.settings import HOGQL_INCREASED_MAX_EXECUTION_TIME @@ -166,7 +168,6 @@ def is_total_value(self) -> bool: return self.trends_display.is_total_value() def build_actors_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: - # Insert CTE here cte_events_query = self._cte_events_query() if cte_events_query.settings is None: cte_events_query.settings = HogQLQuerySettings() @@ -215,13 +216,14 @@ def _get_events_query(self) -> ast.SelectQuery: def _cte_events_query(self) -> ast.SelectQuery: return ast.SelectQuery( - select=[ast.Field(chain=["*"])], # Filter this down to save space + # Could filter this down to what we actually use to save memory + select=[ast.Field(chain=["*"])], select_from=ast.JoinExpr( table=ast.Field(chain=["events"]), alias="e", sample=self._sample_expr(), ), - where=self._cte_events_where_expr(), + where=self._persons_cte_events_where_expr(), ) def _get_actor_value_expr(self) -> ast.Expr: @@ -248,13 +250,17 @@ def _events_where_expr(self, with_breakdown_expr: bool = True) -> ast.And: ] ) - def _cte_events_where_expr(self, with_breakdown_expr: bool = True) -> ast.And: + def _persons_cte_events_where_expr(self, with_breakdown_expr: bool = True) -> ast.And: return ast.And( exprs=[ *self._entity_where_expr(), # *self._prop_where_expr(), *self._date_where_expr(), - *(self._breakdown_where_expr() if with_breakdown_expr else []), + *( + self._breakdown_where_expr() + if with_breakdown_expr and self.trends_query.breakdownFilter.breakdown_type != BreakdownType.PERSON + else [] + ), *self._filter_empty_actors_expr(), ] ) @@ -293,12 +299,13 @@ def _entity_where_expr(self) -> list[ast.Expr]: return conditions - def _prop_where_expr(self) -> list[ast.Expr]: + def _prop_where_expr(self, exclude_person_props=False) -> list[ast.Expr]: conditions: list[ast.Expr] = [] # Filter Test Accounts if ( - self.trends_query.filterTestAccounts + not exclude_person_props + and self.trends_query.filterTestAccounts and isinstance(self.team.test_account_filters, list) and len(self.team.test_account_filters) > 0 ): @@ -307,7 +314,10 @@ def _prop_where_expr(self) -> list[ast.Expr]: # Properties if self.trends_query.properties is not None and self.trends_query.properties != []: - conditions.append(property_to_expr(self.trends_query.properties, self.team)) + properties = self.trends_query.properties + if exclude_person_props: + properties = [x for x in properties if isinstance(x, PersonPropertyFilter)] + conditions.append(property_to_expr(properties, self.team)) return conditions diff --git a/posthog/test/base.py b/posthog/test/base.py index 6258bd34ee6b5..6e9922910a683 100644 --- a/posthog/test/base.py +++ b/posthog/test/base.py @@ -885,10 +885,13 @@ class ClickhouseTestMixin(QueryMatchingTest): snapshot: Any def capture_select_queries(self): - return self.capture_queries(("SELECT", "WITH", "select", "with")) + return self.capture_queries_startswith(("SELECT", "WITH", "select", "with")) + + def capture_queries_startswith(self, query_prefixes: Union[str, tuple[str, ...]]): + return self.capture_queries(lambda x: x.startswith(query_prefixes)) @contextmanager - def capture_queries(self, query_prefixes: Union[str, tuple[str, ...]]): + def capture_queries(self, query_filter: Callable[[str], bool]): queries = [] original_get_client = ch_pool.get_client @@ -901,7 +904,7 @@ def get_client(): original_client_execute = client.execute def execute_wrapper(query, *args, **kwargs): - if sqlparse.format(query, strip_comments=True).strip().startswith(query_prefixes): + if query_filter(sqlparse.format(query, strip_comments=True).strip()): queries.append(query) return original_client_execute(query, *args, **kwargs)