From 7b08365f8c27589a804fd9ef1d8aed1f921d3694 Mon Sep 17 00:00:00 2001 From: timgl Date: Fri, 25 Oct 2024 16:28:28 +0100 Subject: [PATCH] fix --- posthog/hogql/database/schema/persons.py | 6 ++- .../test/__snapshots__/test_persons.ambr | 53 ------------------- .../database/schema/test/test_persons.py | 10 ++-- 3 files changed, 10 insertions(+), 59 deletions(-) diff --git a/posthog/hogql/database/schema/persons.py b/posthog/hogql/database/schema/persons.py index a7ebf84716278..62aa34cbefdd3 100644 --- a/posthog/hogql/database/schema/persons.py +++ b/posthog/hogql/database/schema/persons.py @@ -64,7 +64,11 @@ def select_from_persons_table( and_conditions.append(filter) # For now, only do this optimization for directly querying the persons table (without joins or as part of a subquery) to avoid knock-on effects to insight queries - if hasattr(node.select_from.type, "table") and isinstance(node.select_from.type.table, PersonsTable): + if ( + node.select_from + and getattr(node.select_from.type, "table", False) + and isinstance(node.select_from.type.table, PersonsTable) + ): extractor = WhereClauseExtractor(context) extractor.add_local_tables(join_or_table) where = extractor.get_inner_where(node) diff --git a/posthog/hogql/database/schema/test/__snapshots__/test_persons.ambr b/posthog/hogql/database/schema/test/__snapshots__/test_persons.ambr index 3287699df0853..90d7cb108ef8d 100644 --- a/posthog/hogql/database/schema/test/__snapshots__/test_persons.ambr +++ b/posthog/hogql/database/schema/test/__snapshots__/test_persons.ambr @@ -1,57 +1,4 @@ # serializer version: 1 -# name: TestPersonOptimization.test_funnel_whatever - ''' - SELECT persons.id AS id, - persons.created_at AS created_at, - source.event_count AS event_count - FROM - (SELECT actor_id AS actor_id, - count() AS event_count - FROM - (SELECT e__pdi.person_id AS actor_id, - toTimeZone(e.timestamp, 'UTC') AS timestamp, - e.uuid AS uuid - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(timestamp, toDateTime64('2024-01-01 00:00:00.000000', 6, 'UTC')), less(timestamp, toDateTime64('2024-01-02 00:00:00.000000', 6, 'UTC')), equals(e.event, '$pageview'))) - GROUP BY actor_id) AS source - INNER JOIN - (SELECT argMax(toTimeZone(person.created_at, 'UTC'), person.version) AS created_at, - person.id AS id - FROM person - WHERE and(equals(person.team_id, 2), in(id, - (SELECT source.actor_id AS actor_id - FROM - (SELECT actor_id AS actor_id, count() AS event_count - FROM - (SELECT e__pdi.person_id AS actor_id, toTimeZone(e.timestamp, 'UTC') AS timestamp, e.uuid AS uuid - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(timestamp, toDateTime64('2024-01-01 00:00:00.000000', 6, 'UTC')), less(timestamp, toDateTime64('2024-01-02 00:00:00.000000', 6, 'UTC')), equals(e.event, '$pageview'))) - GROUP BY actor_id) AS source))) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY source.event_count DESC - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0 - ''' -# --- # name: TestPersonOptimization.test_joins_are_left_alone_for_now ''' SELECT events.uuid AS uuid diff --git a/posthog/hogql/database/schema/test/test_persons.py b/posthog/hogql/database/schema/test/test_persons.py index 66d917aef7b44..75d14954b6ad7 100644 --- a/posthog/hogql/database/schema/test/test_persons.py +++ b/posthog/hogql/database/schema/test/test_persons.py @@ -83,8 +83,8 @@ def test_simple_filter(self): modifiers=self.modifiers, ) assert len(response.results) == 2 - assert "where_optimization" in response.clickhouse - assert "in(tuple(person.id, person.version)" not in response.clickhouse + self.assertIn("where_optimization", response.clickhouse) + self.assertNotIn("in(tuple(person.id, person.version)", response.clickhouse) @snapshot_clickhouse_queries def test_joins_are_left_alone_for_now(self): @@ -94,8 +94,8 @@ def test_joins_are_left_alone_for_now(self): modifiers=self.modifiers, ) assert len(response.results) == 2 - assert "in(tuple(person.id, person.version)" in response.clickhouse - assert "where_optimization" not in response.clickhouse + self.assertIn("in(tuple(person.id, person.version)", response.clickhouse) + self.assertNotIn("where_optimization", response.clickhouse) def test_person_modal_not_optimized_yet(self): source_query = TrendsQuery( @@ -122,4 +122,4 @@ def test_person_modal_not_optimized_yet(self): ) query_runner = ActorsQueryRunner(query=actors_query, team=self.team) response = execute_hogql_query(query_runner.to_query(), self.team, modifiers=self.modifiers) - assert "where_optimization" not in response.clickhouse + self.assertNotIn("where_optimization", response.clickhouse)