From 5ee838e7cfc1f6dcbef3692c74fc1aa2294376af Mon Sep 17 00:00:00 2001 From: Marius Andra Date: Tue, 19 Sep 2023 11:51:00 +0200 Subject: [PATCH] feat(hogql): fix union all and limits (#17518) --- posthog/hogql/metadata.py | 2 ++ posthog/hogql/query.py | 23 ++++++++++++++++------- posthog/hogql/test/test_metadata.py | 11 +++++++++++ posthog/hogql/test/test_query.py | 12 ++++++++++++ 4 files changed, 41 insertions(+), 7 deletions(-) diff --git a/posthog/hogql/metadata.py b/posthog/hogql/metadata.py index 5d0a725af47b4..e1fb117c9d8c7 100644 --- a/posthog/hogql/metadata.py +++ b/posthog/hogql/metadata.py @@ -61,6 +61,8 @@ def get_hogql_metadata( def is_valid_view(select_query: ast.SelectQuery | ast.SelectUnionQuery) -> bool: + if not isinstance(select_query, ast.SelectQuery): + return False for field in select_query.select: if not isinstance(field, ast.Alias): return False diff --git a/posthog/hogql/query.py b/posthog/hogql/query.py index 374e40e10b305..5839f2fdd73bd 100644 --- a/posthog/hogql/query.py +++ b/posthog/hogql/query.py @@ -32,7 +32,7 @@ def execute_hogql_query( timings = HogQLTimings() with timings.measure("query"): - if isinstance(query, ast.SelectQuery): + if isinstance(query, ast.SelectQuery) or isinstance(query, ast.SelectUnionQuery): select_query = query query = None else: @@ -57,12 +57,16 @@ def execute_hogql_query( ) select_query = replace_placeholders(select_query, placeholders) - if select_query.limit is None: - with timings.measure("max_limit"): - # One more "max" of MAX_SELECT_RETURNED_ROWS (100k) in applied in the query printer, overriding this if higher. - from posthog.hogql.constants import DEFAULT_RETURNED_ROWS + with timings.measure("max_limit"): + from posthog.hogql.constants import DEFAULT_RETURNED_ROWS - select_query.limit = ast.Constant(value=default_limit or DEFAULT_RETURNED_ROWS) + select_queries = ( + select_query.select_queries if isinstance(select_query, ast.SelectUnionQuery) else [select_query] + ) + for one_query in select_queries: + if one_query.limit is None: + # One more "max" of MAX_SELECT_RETURNED_ROWS (100k) in applied in the query printer. + one_query.limit = ast.Constant(value=default_limit or DEFAULT_RETURNED_ROWS) # Get printed HogQL query, and returned columns. Using a cloned query. with timings.measure("hogql"): @@ -83,7 +87,12 @@ def execute_hogql_query( with timings.measure("print_ast"): hogql = print_prepared_ast(select_query_hogql, hogql_query_context, "hogql") print_columns = [] - for node in select_query_hogql.select: + columns_query = ( + select_query_hogql.select_queries[0] + if isinstance(select_query_hogql, ast.SelectUnionQuery) + else select_query_hogql + ) + for node in columns_query.select: if isinstance(node, ast.Alias): print_columns.append(node.alias) else: diff --git a/posthog/hogql/test/test_metadata.py b/posthog/hogql/test/test_metadata.py index 54dbcb17a9eb6..bdd5cbac97772 100644 --- a/posthog/hogql/test/test_metadata.py +++ b/posthog/hogql/test/test_metadata.py @@ -233,3 +233,14 @@ def test_valid_view_nested_view(self): "errors": [], }, ) + + def test_union_all_does_not_crash(self): + metadata = self._select("SELECT events.event FROM events UNION ALL SELECT events.event FROM events WHERE 1 = 2") + self.assertEqual( + metadata.dict(), + metadata.dict() + | { + "isValid": True, + "errors": [], + }, + ) diff --git a/posthog/hogql/test/test_query.py b/posthog/hogql/test/test_query.py index d45a1edcb9672..55769b4fea152 100644 --- a/posthog/hogql/test/test_query.py +++ b/posthog/hogql/test/test_query.py @@ -1509,3 +1509,15 @@ def test_hogql_query_filters_alias(self): f"SELECT e.event, e.distinct_id FROM events AS e WHERE and(equals(e.team_id, {self.team.pk}), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, %(hogql_val_0)s), ''), 'null'), '^\"|\"$', ''), %(hogql_val_1)s), 0)) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=True", ) self.assertEqual(len(response.results), 2) + + def test_hogql_union_all_limits(self): + query = "SELECT event FROM events UNION ALL SELECT event FROM events" + response = execute_hogql_query(query, team=self.team) + self.assertEqual( + response.hogql, + f"SELECT event FROM events LIMIT 100 UNION ALL SELECT event FROM events LIMIT 100", + ) + self.assertEqual( + response.clickhouse, + f"SELECT events.event FROM events WHERE equals(events.team_id, {self.team.pk}) LIMIT 100 UNION ALL SELECT events.event FROM events WHERE equals(events.team_id, {self.team.pk}) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=True", + )