From f5e4b17201afea02baddeb5a02f7d1f489747884 Mon Sep 17 00:00:00 2001 From: Michael Matloka Date: Thu, 21 Mar 2024 19:20:42 +0100 Subject: [PATCH 01/15] fix(insights): Make HogQL queries `limit_context`-aware (#21022) * fix(insights): Make HogQL queries `limit_context`-aware * Add some tests * Update `execute_hogql_query` shape * Fix minor issues * Update mypy-baseline.txt --- mypy-baseline.txt | 1 - posthog/hogql/query.py | 1 + .../funnel_correlation_query_runner.py | 1 + .../insights/funnels/funnels_query_runner.py | 1 + .../insights/insight_actors_query_runner.py | 1 + .../insights/lifecycle_query_runner.py | 1 + posthog/hogql_queries/insights/paginators.py | 3 ++- .../insights/paths_query_runner.py | 1 + .../insights/retention_query_runner.py | 1 + .../insights/stickiness_query_runner.py | 1 + .../insights/test/test_paginators.py | 6 +++-- .../test/test_retention_query_runner.py | 16 ++++++++++-- .../test/test_stickiness_query_runner.py | 14 ++++++++++- .../trends/test/test_trends_query_runner.py | 25 ++++++++++++++++--- .../insights/trends/trends_query_runner.py | 1 + .../sessions_timeline_query_runner.py | 1 + .../web_analytics/test/test_web_overview.py | 23 +++++++++++++++-- .../hogql_queries/web_analytics/top_clicks.py | 1 + .../web_analytics_query_runner.py | 1 + .../web_analytics/web_overview.py | 1 + 20 files changed, 89 insertions(+), 12 deletions(-) diff --git a/mypy-baseline.txt b/mypy-baseline.txt index 180d259fc3c20..f0a464d26c546 100644 --- a/mypy-baseline.txt +++ b/mypy-baseline.txt @@ -549,7 +549,6 @@ posthog/hogql_queries/insights/trends/test/test_aggregation_operations.py:0: err posthog/hogql_queries/insights/trends/test/test_aggregation_operations.py:0: error: Item "SelectQuery" of "SelectQuery | SelectUnionQuery | Field | Any | None" has no attribute "chain" [union-attr] posthog/hogql_queries/insights/trends/test/test_aggregation_operations.py:0: error: Item "SelectUnionQuery" of "SelectQuery | SelectUnionQuery | Field | Any | None" has no attribute "chain" [union-attr] posthog/hogql_queries/insights/trends/test/test_aggregation_operations.py:0: error: Item "None" of "SelectQuery | SelectUnionQuery | Field | Any | None" has no attribute "chain" [union-attr] -posthog/hogql_queries/insights/test/test_paginators.py:0: error: Argument 2 to "execute_hogql_query" of "HogQLHasMorePaginator" has incompatible type "SelectQuery | SelectUnionQuery"; expected "SelectQuery" [arg-type] posthog/hogql_queries/insights/test/test_paginators.py:0: error: Value of type "object" is not indexable [index] posthog/hogql_queries/insights/test/test_paginators.py:0: error: Value of type "object" is not indexable [index] posthog/hogql_queries/insights/test/test_paginators.py:0: error: Value of type "object" is not indexable [index] diff --git a/posthog/hogql/query.py b/posthog/hogql/query.py index e7bc3f7984205..69b5656020904 100644 --- a/posthog/hogql/query.py +++ b/posthog/hogql/query.py @@ -28,6 +28,7 @@ def execute_hogql_query( query: Union[str, ast.SelectQuery, ast.SelectUnionQuery], team: Team, + *, query_type: str = "hogql_query", filters: Optional[HogQLFilters] = None, placeholders: Optional[Dict[str, ast.Expr]] = None, diff --git a/posthog/hogql_queries/insights/funnels/funnel_correlation_query_runner.py b/posthog/hogql_queries/insights/funnels/funnel_correlation_query_runner.py index 2fef78a372324..bcb362ff3d4f9 100644 --- a/posthog/hogql_queries/insights/funnels/funnel_correlation_query_runner.py +++ b/posthog/hogql_queries/insights/funnels/funnel_correlation_query_runner.py @@ -236,6 +236,7 @@ def _calculate(self) -> tuple[List[EventOddsRatio], bool, str, HogQLQueryRespons team=self.team, timings=self.timings, modifiers=self.modifiers, + limit_context=self.limit_context, ) assert response.results diff --git a/posthog/hogql_queries/insights/funnels/funnels_query_runner.py b/posthog/hogql_queries/insights/funnels/funnels_query_runner.py index 38e04603f2725..b1ca8dfd6dd54 100644 --- a/posthog/hogql_queries/insights/funnels/funnels_query_runner.py +++ b/posthog/hogql_queries/insights/funnels/funnels_query_runner.py @@ -92,6 +92,7 @@ def calculate(self): team=self.team, timings=self.timings, modifiers=self.modifiers, + limit_context=self.limit_context, ) results = self.funnel_class._format_results(response.results) diff --git a/posthog/hogql_queries/insights/insight_actors_query_runner.py b/posthog/hogql_queries/insights/insight_actors_query_runner.py index a0e38abae1776..d58f36cb6f7ee 100644 --- a/posthog/hogql_queries/insights/insight_actors_query_runner.py +++ b/posthog/hogql_queries/insights/insight_actors_query_runner.py @@ -102,6 +102,7 @@ def calculate(self) -> HogQLQueryResponse: team=self.team, timings=self.timings, modifiers=self.modifiers, + limit_context=self.limit_context, ) def _is_stale(self, cached_result_package): diff --git a/posthog/hogql_queries/insights/lifecycle_query_runner.py b/posthog/hogql_queries/insights/lifecycle_query_runner.py index 24bbe36f1c6bf..62968f5349e0e 100644 --- a/posthog/hogql_queries/insights/lifecycle_query_runner.py +++ b/posthog/hogql_queries/insights/lifecycle_query_runner.py @@ -157,6 +157,7 @@ def calculate(self) -> LifecycleQueryResponse: team=self.team, timings=self.timings, modifiers=self.modifiers, + limit_context=self.limit_context, ) # TODO: can we move the data conversion part into the query as well? It would make it easier to swap diff --git a/posthog/hogql_queries/insights/paginators.py b/posthog/hogql_queries/insights/paginators.py index 6dbdb1543b929..0dfda79ced617 100644 --- a/posthog/hogql_queries/insights/paginators.py +++ b/posthog/hogql_queries/insights/paginators.py @@ -54,8 +54,9 @@ def trim_results(self) -> list[Any]: def execute_hogql_query( self, - query_type: str, query: ast.SelectQuery, + *, + query_type: str, **kwargs, ) -> HogQLQueryResponse: self.response = cast( diff --git a/posthog/hogql_queries/insights/paths_query_runner.py b/posthog/hogql_queries/insights/paths_query_runner.py index c10a5a2320207..c454feb8e56ac 100644 --- a/posthog/hogql_queries/insights/paths_query_runner.py +++ b/posthog/hogql_queries/insights/paths_query_runner.py @@ -725,6 +725,7 @@ def calculate(self) -> PathsQueryResponse: team=self.team, timings=self.timings, modifiers=self.modifiers, + limit_context=self.limit_context, ) response.results = self.validate_results(response.results) diff --git a/posthog/hogql_queries/insights/retention_query_runner.py b/posthog/hogql_queries/insights/retention_query_runner.py index 221cb976757d2..3ac2c5b4b5462 100644 --- a/posthog/hogql_queries/insights/retention_query_runner.py +++ b/posthog/hogql_queries/insights/retention_query_runner.py @@ -313,6 +313,7 @@ def calculate(self) -> RetentionQueryResponse: team=self.team, timings=self.timings, modifiers=self.modifiers, + limit_context=self.limit_context, ) result_dict = { diff --git a/posthog/hogql_queries/insights/stickiness_query_runner.py b/posthog/hogql_queries/insights/stickiness_query_runner.py index d0b4b65c67f9b..184e3c0af02df 100644 --- a/posthog/hogql_queries/insights/stickiness_query_runner.py +++ b/posthog/hogql_queries/insights/stickiness_query_runner.py @@ -212,6 +212,7 @@ def calculate(self): team=self.team, timings=self.timings, modifiers=self.modifiers, + limit_context=self.limit_context, ) if response.timings is not None: diff --git a/posthog/hogql_queries/insights/test/test_paginators.py b/posthog/hogql_queries/insights/test/test_paginators.py index ac83efb45b353..6698115c46535 100644 --- a/posthog/hogql_queries/insights/test/test_paginators.py +++ b/posthog/hogql_queries/insights/test/test_paginators.py @@ -1,3 +1,5 @@ +from typing import cast +from posthog.hogql.ast import SelectQuery from posthog.hogql.constants import ( LimitContext, get_default_limit_for_context, @@ -136,8 +138,8 @@ def test_response_params_consistency(self): """Test consistency of response_params method.""" paginator = HogQLHasMorePaginator(limit=5, offset=10) paginator.response = paginator.execute_hogql_query( - "test_query", - parse_select("SELECT * FROM persons"), + cast(SelectQuery, parse_select("SELECT * FROM persons")), + query_type="test_query", team=self.team, ) params = paginator.response_params() diff --git a/posthog/hogql_queries/insights/test/test_retention_query_runner.py b/posthog/hogql_queries/insights/test/test_retention_query_runner.py index 30edb32102f76..04c108dd779f1 100644 --- a/posthog/hogql_queries/insights/test/test_retention_query_runner.py +++ b/posthog/hogql_queries/insights/test/test_retention_query_runner.py @@ -1,3 +1,5 @@ +from typing import Optional +from unittest.mock import MagicMock, patch import uuid from datetime import datetime @@ -6,11 +8,14 @@ from django.test import override_settings from rest_framework import status +from posthog.clickhouse.client.execute import sync_execute from posthog.constants import ( RETENTION_FIRST_TIME, TREND_FILTER_TYPE_ACTIONS, TREND_FILTER_TYPE_EVENTS, ) +from posthog.hogql.constants import LimitContext +from posthog.hogql.query import INCREASED_MAX_EXECUTION_TIME from posthog.hogql_queries.insights.retention_query_runner import RetentionQueryRunner from posthog.hogql_queries.actors_query_runner import ActorsQueryRunner from posthog.models import Action, ActionStep @@ -1685,10 +1690,10 @@ def test_day_interval_sampled(self): class TestClickhouseRetentionGroupAggregation(ClickhouseTestMixin, APIBaseTest): - def run_query(self, query): + def run_query(self, query, *, limit_context: Optional[LimitContext] = None): if not query.get("retentionFilter"): query["retentionFilter"] = {} - runner = RetentionQueryRunner(team=self.team, query=query) + runner = RetentionQueryRunner(team=self.team, query=query, limit_context=limit_context) return runner.calculate().model_dump()["results"] def run_actors_query(self, interval, query, select=None, actor="person"): @@ -1920,3 +1925,10 @@ def test_groups_aggregating_person_on_events(self): [1], ], ) + + @patch("posthog.hogql.query.sync_execute", wraps=sync_execute) + def test_limit_is_context_aware(self, mock_sync_execute: MagicMock): + self.run_query(query={}, limit_context=LimitContext.QUERY_ASYNC) + + mock_sync_execute.assert_called_once() + self.assertIn(f" max_execution_time={INCREASED_MAX_EXECUTION_TIME},", mock_sync_execute.call_args[0][0]) diff --git a/posthog/hogql_queries/insights/test/test_stickiness_query_runner.py b/posthog/hogql_queries/insights/test/test_stickiness_query_runner.py index 3de1fb6ce865e..6e25827e6ecba 100644 --- a/posthog/hogql_queries/insights/test/test_stickiness_query_runner.py +++ b/posthog/hogql_queries/insights/test/test_stickiness_query_runner.py @@ -1,8 +1,12 @@ from dataclasses import dataclass from typing import Dict, List, Optional, Union +from unittest.mock import MagicMock, patch from django.test import override_settings from freezegun import freeze_time +from posthog.clickhouse.client.execute import sync_execute +from posthog.hogql.constants import LimitContext +from posthog.hogql.query import INCREASED_MAX_EXECUTION_TIME from posthog.hogql_queries.insights.stickiness_query_runner import StickinessQueryRunner from posthog.models.action.action import Action from posthog.models.action_step import ActionStep @@ -197,6 +201,7 @@ def _run_query( properties: Optional[StickinessProperties] = None, filters: Optional[StickinessFilter] = None, filter_test_accounts: Optional[bool] = False, + limit_context: Optional[LimitContext] = None, ): query_series: List[EventsNode | ActionsNode] = [EventsNode(event="$pageview")] if series is None else series query_date_from = date_from or self.default_date_from @@ -211,7 +216,7 @@ def _run_query( stickinessFilter=filters, filterTestAccounts=filter_test_accounts, ) - return StickinessQueryRunner(team=self.team, query=query).calculate() + return StickinessQueryRunner(team=self.team, query=query, limit_context=limit_context).calculate() def test_stickiness_runs(self): self._create_test_events() @@ -580,3 +585,10 @@ def test_hogql_aggregations(self): 1, 0, ] + + @patch("posthog.hogql.query.sync_execute", wraps=sync_execute) + def test_limit_is_context_aware(self, mock_sync_execute: MagicMock): + self._run_query(limit_context=LimitContext.QUERY_ASYNC) + + mock_sync_execute.assert_called_once() + self.assertIn(f" max_execution_time={INCREASED_MAX_EXECUTION_TIME},", mock_sync_execute.call_args[0][0]) diff --git a/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py b/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py index 09a7389e74c7b..433df7c7df23b 100644 --- a/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py +++ b/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py @@ -1,11 +1,13 @@ from dataclasses import dataclass from typing import Dict, List, Optional -from unittest.mock import patch +from unittest.mock import MagicMock, patch from django.test import override_settings from freezegun import freeze_time +from posthog.clickhouse.client.execute import sync_execute from posthog.hogql import ast -from posthog.hogql.constants import MAX_SELECT_RETURNED_ROWS +from posthog.hogql.constants import MAX_SELECT_RETURNED_ROWS, LimitContext from posthog.hogql.modifiers import create_default_modifiers_for_team +from posthog.hogql.query import INCREASED_MAX_EXECUTION_TIME from posthog.hogql_queries.insights.trends.trends_query_runner import TrendsQueryRunner from posthog.models.cohort.cohort import Cohort from posthog.models.property_definition import PropertyDefinition @@ -175,6 +177,7 @@ def _create_query_runner( breakdown: Optional[BreakdownFilter] = None, filter_test_accounts: Optional[bool] = None, hogql_modifiers: Optional[HogQLQueryModifiers] = None, + limit_context: Optional[LimitContext] = None, ) -> TrendsQueryRunner: query_series: List[EventsNode | ActionsNode] = [EventsNode(event="$pageview")] if series is None else series query = TrendsQuery( @@ -185,7 +188,7 @@ def _create_query_runner( breakdownFilter=breakdown, filterTestAccounts=filter_test_accounts, ) - return TrendsQueryRunner(team=self.team, query=query, modifiers=hogql_modifiers) + return TrendsQueryRunner(team=self.team, query=query, modifiers=hogql_modifiers, limit_context=limit_context) def _run_trends_query( self, @@ -195,8 +198,10 @@ def _run_trends_query( series: Optional[List[EventsNode | ActionsNode]], trends_filters: Optional[TrendsFilter] = None, breakdown: Optional[BreakdownFilter] = None, + *, filter_test_accounts: Optional[bool] = None, hogql_modifiers: Optional[HogQLQueryModifiers] = None, + limit_context: Optional[LimitContext] = None, ): return self._create_query_runner( date_from=date_from, @@ -207,6 +212,7 @@ def _run_trends_query( breakdown=breakdown, filter_test_accounts=filter_test_accounts, hogql_modifiers=hogql_modifiers, + limit_context=limit_context, ).calculate() def test_trends_query_label(self): @@ -1694,3 +1700,16 @@ def test_to_actors_query_options_breakdowns_hogql(self): BreakdownItem(label="Safari", value="Safari"), BreakdownItem(label="Edge", value="Edge"), ] + + @patch("posthog.hogql.query.sync_execute", wraps=sync_execute) + def test_limit_is_context_aware(self, mock_sync_execute: MagicMock): + self._run_trends_query( + "2020-01-09", + "2020-01-20", + IntervalType.day, + [EventsNode(event="$pageview")], + limit_context=LimitContext.QUERY_ASYNC, + ) + + mock_sync_execute.assert_called_once() + self.assertIn(f" max_execution_time={INCREASED_MAX_EXECUTION_TIME},", mock_sync_execute.call_args[0][0]) diff --git a/posthog/hogql_queries/insights/trends/trends_query_runner.py b/posthog/hogql_queries/insights/trends/trends_query_runner.py index b698e095e8b09..d66110298ffab 100644 --- a/posthog/hogql_queries/insights/trends/trends_query_runner.py +++ b/posthog/hogql_queries/insights/trends/trends_query_runner.py @@ -292,6 +292,7 @@ def run(index: int, query: ast.SelectQuery | ast.SelectUnionQuery, is_parallel: team=self.team, timings=self.timings, modifiers=self.modifiers, + limit_context=self.limit_context, ) timings_matrix[index] = response.timings diff --git a/posthog/hogql_queries/sessions_timeline_query_runner.py b/posthog/hogql_queries/sessions_timeline_query_runner.py index d920ec7cf94fd..cda9433d63efa 100644 --- a/posthog/hogql_queries/sessions_timeline_query_runner.py +++ b/posthog/hogql_queries/sessions_timeline_query_runner.py @@ -135,6 +135,7 @@ def calculate(self) -> SessionsTimelineQueryResponse: query_type="SessionsTimelineQuery", timings=self.timings, modifiers=self.modifiers, + limit_context=self.limit_context, ) assert query_result.results is not None timeline_entries_map: Dict[str, TimelineEntry] = {} diff --git a/posthog/hogql_queries/web_analytics/test/test_web_overview.py b/posthog/hogql_queries/web_analytics/test/test_web_overview.py index 63a26ffea9233..dcafe660fc72d 100644 --- a/posthog/hogql_queries/web_analytics/test/test_web_overview.py +++ b/posthog/hogql_queries/web_analytics/test/test_web_overview.py @@ -1,6 +1,11 @@ +from typing import Optional +from unittest.mock import MagicMock, patch from freezegun import freeze_time from parameterized import parameterized +from posthog.clickhouse.client.execute import sync_execute +from posthog.hogql.constants import LimitContext +from posthog.hogql.query import INCREASED_MAX_EXECUTION_TIME from posthog.hogql_queries.web_analytics.web_overview import WebOverviewQueryRunner from posthog.schema import WebOverviewQuery, DateRange from posthog.test.base import ( @@ -36,14 +41,21 @@ def _create_events(self, data, event="$pageview"): ) return person_result - def _run_web_overview_query(self, date_from, date_to, use_sessions_table=False, compare=True): + def _run_web_overview_query( + self, + date_from: str, + date_to: str, + use_sessions_table: bool = False, + compare: bool = True, + limit_context: Optional[LimitContext] = None, + ): query = WebOverviewQuery( dateRange=DateRange(date_from=date_from, date_to=date_to), properties=[], compare=compare, useSessionsTable=use_sessions_table, ) - runner = WebOverviewQueryRunner(team=self.team, query=query) + runner = WebOverviewQueryRunner(team=self.team, query=query, limit_context=limit_context) return runner.calculate() @parameterized.expand([(True,), (False,)]) @@ -185,3 +197,10 @@ def test_correctly_counts_pageviews_in_long_running_session(self, use_sessions_t sessions = results[2] self.assertEqual(1, sessions.value) + + @patch("posthog.hogql.query.sync_execute", wraps=sync_execute) + def test_limit_is_context_aware(self, mock_sync_execute: MagicMock): + self._run_web_overview_query("2023-12-01", "2023-12-03", limit_context=LimitContext.QUERY_ASYNC) + + mock_sync_execute.assert_called_once() + self.assertIn(f" max_execution_time={INCREASED_MAX_EXECUTION_TIME},", mock_sync_execute.call_args[0][0]) diff --git a/posthog/hogql_queries/web_analytics/top_clicks.py b/posthog/hogql_queries/web_analytics/top_clicks.py index 3218e68975f7a..192d7b279b704 100644 --- a/posthog/hogql_queries/web_analytics/top_clicks.py +++ b/posthog/hogql_queries/web_analytics/top_clicks.py @@ -51,6 +51,7 @@ def calculate(self): team=self.team, timings=self.timings, modifiers=self.modifiers, + limit_context=self.limit_context, ) return WebTopClicksQueryResponse( diff --git a/posthog/hogql_queries/web_analytics/web_analytics_query_runner.py b/posthog/hogql_queries/web_analytics/web_analytics_query_runner.py index da4f98edcbf32..12ef703271c51 100644 --- a/posthog/hogql_queries/web_analytics/web_analytics_query_runner.py +++ b/posthog/hogql_queries/web_analytics/web_analytics_query_runner.py @@ -211,6 +211,7 @@ def _get_or_calculate_sample_ratio(self) -> SamplingRate: query=event_count, team=self.team, timings=self.timings, + limit_context=self.limit_context, ) if not response.results or not response.results[0] or not response.results[0][0]: diff --git a/posthog/hogql_queries/web_analytics/web_overview.py b/posthog/hogql_queries/web_analytics/web_overview.py index 38388315c8f0b..2da015a60ac4e 100644 --- a/posthog/hogql_queries/web_analytics/web_overview.py +++ b/posthog/hogql_queries/web_analytics/web_overview.py @@ -285,6 +285,7 @@ def calculate(self): team=self.team, timings=self.timings, modifiers=self.modifiers, + limit_context=self.limit_context, ) assert response.results From 7ac5d2b7c4f7b9ccb8d051e9deeecba2d20be042 Mon Sep 17 00:00:00 2001 From: Brett Hoerner Date: Thu, 21 Mar 2024 12:41:23 -0600 Subject: [PATCH 02/15] chore(batch-exports): remove log about empty heartbeat (#21075) cleanup(batch-exports): remove log about empty heartbeat --- posthog/temporal/common/utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/posthog/temporal/common/utils.py b/posthog/temporal/common/utils.py index 1b61a356dc898..022c8270d7748 100644 --- a/posthog/temporal/common/utils.py +++ b/posthog/temporal/common/utils.py @@ -119,10 +119,9 @@ async def should_resume_from_activity_heartbeat( heartbeat_details = heartbeat_type.from_activity(activity) except EmptyHeartbeatError: - # We don't log this as a warning/error because it's the expected exception when heartbeat is empty. + # We don't log this as it's the expected exception when heartbeat is empty. heartbeat_details = None received = False - logger.debug("Did not receive details from previous activity execution") except NotEnoughHeartbeatValuesError: heartbeat_details = None From 888f2eee7b0b5ab2d8ef424b24e3c00ed850b914 Mon Sep 17 00:00:00 2001 From: Eric Duong Date: Thu, 21 Mar 2024 15:12:05 -0400 Subject: [PATCH 03/15] fix(data-warehouse): make sure taxonomic filter and popup are populated when selected a data warehouse filter (#21021) * add fix * use props correctly * Update UI snapshots for `chromium` (1) * check null * Update UI snapshots for `chromium` (1) --------- Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com> --- .../DefinitionPopover/DefinitionPopoverContents.tsx | 9 +++++++++ .../src/lib/components/TaxonomicFilter/InfiniteList.tsx | 1 - .../lib/components/TaxonomicFilter/TaxonomicFilter.tsx | 2 ++ .../components/TaxonomicFilter/taxonomicFilterLogic.tsx | 3 ++- frontend/src/lib/components/TaxonomicFilter/types.ts | 2 ++ .../lib/components/TaxonomicPopover/TaxonomicPopover.tsx | 4 ++++ .../ActionFilter/ActionFilterRow/ActionFilterRow.tsx | 1 + 7 files changed, 20 insertions(+), 2 deletions(-) diff --git a/frontend/src/lib/components/DefinitionPopover/DefinitionPopoverContents.tsx b/frontend/src/lib/components/DefinitionPopover/DefinitionPopoverContents.tsx index 1c9e4928ecb22..8be1a72543f44 100644 --- a/frontend/src/lib/components/DefinitionPopover/DefinitionPopoverContents.tsx +++ b/frontend/src/lib/components/DefinitionPopover/DefinitionPopoverContents.tsx @@ -111,8 +111,16 @@ function DefinitionView({ group }: { group: TaxonomicFilterGroup }): JSX.Element } = useValues(definitionPopoverLogic) const { setLocalDefinition } = useActions(definitionPopoverLogic) + const { selectedItemMeta } = useValues(taxonomicFilterLogic) const { selectItem } = useActions(taxonomicFilterLogic) + // Use effect here to make definition view stateful. TaxonomicFilterLogic won't mount within definitionPopoverLogic + useEffect(() => { + if (selectedItemMeta && definition.name == selectedItemMeta.id) { + setLocalDefinition(selectedItemMeta) + } + }, [definition]) + if (!definition) { return <> } @@ -280,6 +288,7 @@ function DefinitionView({ group }: { group: TaxonomicFilterGroup }): JSX.Element value: column.key, })) const itemValue = localDefinition ? group?.getValue?.(localDefinition) : null + return (
diff --git a/frontend/src/lib/components/TaxonomicFilter/InfiniteList.tsx b/frontend/src/lib/components/TaxonomicFilter/InfiniteList.tsx index eca954d86f94f..8e0237d36f252 100644 --- a/frontend/src/lib/components/TaxonomicFilter/InfiniteList.tsx +++ b/frontend/src/lib/components/TaxonomicFilter/InfiniteList.tsx @@ -173,7 +173,6 @@ export function InfiniteList({ popupAnchorElement }: InfiniteListProps): JSX.Ele const { mouseInteractionsEnabled, activeTab, searchQuery, value, groupType, eventNames } = useValues(taxonomicFilterLogic) const { selectItem } = useActions(taxonomicFilterLogic) - const { isLoading, results, diff --git a/frontend/src/lib/components/TaxonomicFilter/TaxonomicFilter.tsx b/frontend/src/lib/components/TaxonomicFilter/TaxonomicFilter.tsx index bd2f56b8dcfc9..52e99e1e432e6 100644 --- a/frontend/src/lib/components/TaxonomicFilter/TaxonomicFilter.tsx +++ b/frontend/src/lib/components/TaxonomicFilter/TaxonomicFilter.tsx @@ -21,6 +21,7 @@ export function TaxonomicFilter({ taxonomicFilterLogicKey: taxonomicFilterLogicKeyInput, groupType, value, + filter, onChange, onClose, taxonomicGroupTypes, @@ -48,6 +49,7 @@ export function TaxonomicFilter({ taxonomicFilterLogicKey, groupType, value, + filter, onChange, taxonomicGroupTypes, optionsFromProp, diff --git a/frontend/src/lib/components/TaxonomicFilter/taxonomicFilterLogic.tsx b/frontend/src/lib/components/TaxonomicFilter/taxonomicFilterLogic.tsx index 9809e801308f6..b5904b8957056 100644 --- a/frontend/src/lib/components/TaxonomicFilter/taxonomicFilterLogic.tsx +++ b/frontend/src/lib/components/TaxonomicFilter/taxonomicFilterLogic.tsx @@ -141,6 +141,7 @@ export const taxonomicFilterLogic = kea([ ], })), selectors({ + selectedItemMeta: [() => [(_, props) => props.filter], (filter) => filter], taxonomicFilterLogicKey: [ (_, p) => [p.taxonomicFilterLogicKey], (taxonomicFilterLogicKey) => taxonomicFilterLogicKey, @@ -218,7 +219,7 @@ export const taxonomicFilterLogic = kea([ logic: dataWarehouseSceneLogic, value: 'externalTables', getName: (table: DataWarehouseTableType) => table.name, - getValue: (table: DataWarehouseTableType) => table.id, + getValue: (table: DataWarehouseTableType) => table.name, getPopoverHeader: () => 'Data Warehouse Table', getIcon: () => , }, diff --git a/frontend/src/lib/components/TaxonomicFilter/types.ts b/frontend/src/lib/components/TaxonomicFilter/types.ts index cde2e9d678ded..a3edff51c16f0 100644 --- a/frontend/src/lib/components/TaxonomicFilter/types.ts +++ b/frontend/src/lib/components/TaxonomicFilter/types.ts @@ -1,6 +1,7 @@ import Fuse from 'fuse.js' import { BuiltLogic, LogicWrapper } from 'kea' import { DataWarehouseTableType } from 'scenes/data-warehouse/types' +import { LocalFilter } from 'scenes/insights/filters/ActionFilter/entityFilterLogic' import { AnyDataNode, DatabaseSchemaQueryResponseField } from '~/queries/schema' import { @@ -22,6 +23,7 @@ export interface TaxonomicFilterProps { value?: TaxonomicFilterValue onChange?: (group: TaxonomicFilterGroup, value: TaxonomicFilterValue, item: any) => void onClose?: () => void + filter?: LocalFilter taxonomicGroupTypes: TaxonomicFilterGroupType[] taxonomicFilterLogicKey?: string optionsFromProp?: Partial> diff --git a/frontend/src/lib/components/TaxonomicPopover/TaxonomicPopover.tsx b/frontend/src/lib/components/TaxonomicPopover/TaxonomicPopover.tsx index 4fe515646323c..b0e6cfc85a508 100644 --- a/frontend/src/lib/components/TaxonomicPopover/TaxonomicPopover.tsx +++ b/frontend/src/lib/components/TaxonomicPopover/TaxonomicPopover.tsx @@ -4,6 +4,7 @@ import { TaxonomicFilterGroupType, TaxonomicFilterValue } from 'lib/components/T import { LemonButton, LemonButtonProps } from 'lib/lemon-ui/LemonButton' import { LemonDropdown } from 'lib/lemon-ui/LemonDropdown' import { useEffect, useState } from 'react' +import { LocalFilter } from 'scenes/insights/filters/ActionFilter/entityFilterLogic' import { AnyDataNode, DatabaseSchemaQueryResponseField } from '~/queries/schema' @@ -13,6 +14,7 @@ export interface TaxonomicPopoverProps void + filter?: LocalFilter groupTypes?: TaxonomicFilterGroupType[] renderValue?: (value: ValueType) => JSX.Element | null eventNames?: string[] @@ -41,6 +43,7 @@ export function TaxonomicStringPopover(props: TaxonomicPopoverProps): JS export function TaxonomicPopover({ groupType, value, + filter, onChange, renderValue, groupTypes, @@ -81,6 +84,7 @@ export function TaxonomicPopover { onChange?.(payload as ValueType, type, item) setVisible(false) diff --git a/frontend/src/scenes/insights/filters/ActionFilter/ActionFilterRow/ActionFilterRow.tsx b/frontend/src/scenes/insights/filters/ActionFilter/ActionFilterRow/ActionFilterRow.tsx index 3f6d31c9489b2..bca5a483baf48 100644 --- a/frontend/src/scenes/insights/filters/ActionFilter/ActionFilterRow/ActionFilterRow.tsx +++ b/frontend/src/scenes/insights/filters/ActionFilter/ActionFilterRow/ActionFilterRow.tsx @@ -228,6 +228,7 @@ export function ActionFilterRow({ fullWidth groupType={filter.type as TaxonomicFilterGroupType} value={getValue(value, filter)} + filter={filter} onChange={(changedValue, taxonomicGroupType, item) => { const groupType = taxonomicFilterGroupTypeToEntityType(taxonomicGroupType) if (groupType === EntityTypes.DATA_WAREHOUSE) { From d35f67ec53d369df51597acea9cecb67b223ded0 Mon Sep 17 00:00:00 2001 From: Tiina Turban Date: Thu, 21 Mar 2024 21:52:15 +0100 Subject: [PATCH 04/15] fix: batch export PG form checkbox initial edit state (#21088) --- .../batch_exports/BatchExportEditForm.tsx | 26 +++++++++++-------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/frontend/src/scenes/batch_exports/BatchExportEditForm.tsx b/frontend/src/scenes/batch_exports/BatchExportEditForm.tsx index 7fbbc8cc29d69..b015659cfaef1 100644 --- a/frontend/src/scenes/batch_exports/BatchExportEditForm.tsx +++ b/frontend/src/scenes/batch_exports/BatchExportEditForm.tsx @@ -375,17 +375,21 @@ export function BatchExportsEditFields({ - - Does your Postgres instance have a self-signed SSL certificate? - - - - - } - /> + {({ value, onChange }) => ( + + Does your Postgres instance have a self-signed SSL certificate? + + + + + } + checked={!!value} + onChange={onChange} + /> + )} From fa5a1d61029bea66f91f237a47ce9f72808eed1b Mon Sep 17 00:00:00 2001 From: Marius Andra Date: Thu, 21 Mar 2024 23:55:28 +0100 Subject: [PATCH 05/15] feat(insights): string breakdowns (#21023) --- frontend/src/queries/query.ts | 8 +- .../views/InsightsTable/InsightsTable.tsx | 3 +- mypy-baseline.txt | 14 +- package.json | 1 + pnpm-lock.yaml | 8 + .../insights/trends/breakdown.py | 115 +++----- .../insights/trends/breakdown_values.py | 23 +- .../test/__snapshots__/test_trends.ambr | 260 +++++++++--------- .../test_trends_data_warehouse_query.ambr | 16 +- .../insights/trends/test/test_trends.py | 4 +- .../trends/test/test_trends_query_runner.py | 19 +- .../insights/trends/trends_query_builder.py | 10 +- .../insights/trends/trends_query_runner.py | 28 +- 13 files changed, 234 insertions(+), 275 deletions(-) diff --git a/frontend/src/queries/query.ts b/frontend/src/queries/query.ts index 78778cec4322a..f866b2f336d31 100644 --- a/frontend/src/queries/query.ts +++ b/frontend/src/queries/query.ts @@ -220,7 +220,13 @@ export async function query( (hogQLInsightsFunnelsFlagEnabled && isFunnelsQuery(queryNode)) ) { if (hogQLInsightsLiveCompareEnabled) { - const legacyFunction = legacyUrl ? fetchLegacyUrl : fetchLegacyInsights + const legacyFunction = (): any => { + try { + return legacyUrl ? fetchLegacyUrl : fetchLegacyInsights + } catch (e) { + console.error('Error fetching legacy insights', e) + } + } let legacyResponse: any ;[response, legacyResponse] = await Promise.all([ executeQuery(queryNode, methodOptions, refresh, queryId), diff --git a/frontend/src/scenes/insights/views/InsightsTable/InsightsTable.tsx b/frontend/src/scenes/insights/views/InsightsTable/InsightsTable.tsx index d426ca87f525c..c6d037c3d1792 100644 --- a/frontend/src/scenes/insights/views/InsightsTable/InsightsTable.tsx +++ b/frontend/src/scenes/insights/views/InsightsTable/InsightsTable.tsx @@ -3,6 +3,7 @@ import './InsightsTable.scss' import { useActions, useValues } from 'kea' import { getSeriesColor } from 'lib/colors' import { LemonTable, LemonTableColumn } from 'lib/lemon-ui/LemonTable' +import { compare as compareFn } from 'natural-orderby' import { insightLogic } from 'scenes/insights/insightLogic' import { insightSceneLogic } from 'scenes/insights/insightSceneLogic' import { isTrendsFilter } from 'scenes/insights/sharedUtils' @@ -157,7 +158,7 @@ export function InsightsTable({ } const labelA = formatItemBreakdownLabel(a) const labelB = formatItemBreakdownLabel(b) - return labelA.localeCompare(labelB) + return compareFn()(labelA, labelB) }, }) if (isTrends && display === ChartDisplayType.WorldMap) { diff --git a/mypy-baseline.txt b/mypy-baseline.txt index f0a464d26c546..9d5fe48cb5777 100644 --- a/mypy-baseline.txt +++ b/mypy-baseline.txt @@ -346,20 +346,12 @@ posthog/hogql_queries/sessions_timeline_query_runner.py:0: error: Statement is u posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown_type" [union-attr] posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown_histogram_bin_count" [union-attr] posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown_type" [union-attr] -posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown" [union-attr] -posthog/hogql_queries/insights/trends/breakdown.py:0: error: Argument 1 to "parse_expr" has incompatible type "str | float | list[str | float] | Any | None"; expected "str" [arg-type] -posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown_type" [union-attr] -posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown_type" [union-attr] -posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown" [union-attr] -posthog/hogql_queries/insights/trends/breakdown.py:0: error: Argument 1 to "parse_expr" has incompatible type "str | float | list[str | float] | Any | None"; expected "str" [arg-type] -posthog/hogql_queries/insights/trends/breakdown.py:0: error: Statement is unreachable [unreachable] posthog/hogql_queries/insights/trends/breakdown.py:0: error: Argument "exprs" to "Or" has incompatible type "list[CompareOperation]"; expected "list[Expr]" [arg-type] posthog/hogql_queries/insights/trends/breakdown.py:0: note: "List" is invariant -- see https://mypy.readthedocs.io/en/stable/common_issues.html#variance posthog/hogql_queries/insights/trends/breakdown.py:0: note: Consider using "Sequence" instead, which is covariant -posthog/hogql_queries/insights/trends/breakdown.py:0: error: Incompatible types in assignment (expression has type "float", variable has type "int") [assignment] -posthog/hogql_queries/insights/trends/breakdown.py:0: error: Incompatible types in assignment (expression has type "float", variable has type "int") [assignment] -posthog/hogql_queries/insights/trends/breakdown.py:0: error: Incompatible types in assignment (expression has type "str", variable has type "int") [assignment] -posthog/hogql_queries/insights/trends/breakdown.py:0: error: Incompatible types in assignment (expression has type "str", variable has type "int") [assignment] +posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown_type" [union-attr] +posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown" [union-attr] +posthog/hogql_queries/insights/trends/breakdown.py:0: error: Argument 1 to "parse_expr" has incompatible type "str | float | list[str | float] | Any | None"; expected "str" [arg-type] posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown_type" [union-attr] posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown" [union-attr] posthog/hogql_queries/insights/trends/breakdown.py:0: error: Argument "breakdown_field" to "get_properties_chain" has incompatible type "str | float | list[str | float] | Any | None"; expected "str" [arg-type] diff --git a/package.json b/package.json index 770be74997198..79946a087fbf4 100644 --- a/package.json +++ b/package.json @@ -140,6 +140,7 @@ "maplibre-gl": "^3.5.1", "md5": "^2.3.0", "monaco-editor": "^0.39.0", + "natural-orderby": "^3.0.2", "papaparse": "^5.4.1", "pmtiles": "^2.11.0", "postcss": "^8.4.31", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 5f157ec8b039e..7fc1d7cbd22a4 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -238,6 +238,9 @@ dependencies: monaco-editor: specifier: ^0.39.0 version: 0.39.0 + natural-orderby: + specifier: ^3.0.2 + version: 3.0.2 papaparse: specifier: ^5.4.1 version: 5.4.1 @@ -15917,6 +15920,11 @@ packages: resolution: {integrity: sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==} dev: true + /natural-orderby@3.0.2: + resolution: {integrity: sha512-x7ZdOwBxZCEm9MM7+eQCjkrNLrW3rkBKNHVr78zbtqnMGVNlnDi6C/eUEYgxHNrcbu0ymvjzcwIL/6H1iHri9g==} + engines: {node: '>=18'} + dev: false + /needle@3.3.1: resolution: {integrity: sha512-6k0YULvhpw+RoLNiQCRKOl09Rv1dPLr8hHnVjHqdolKwDrdNyk+Hmrthi4lIGPPz3r39dLx0hsF5s40sZ3Us4Q==} engines: {node: '>= 4.4.x'} diff --git a/posthog/hogql_queries/insights/trends/breakdown.py b/posthog/hogql_queries/insights/trends/breakdown.py index 45a3a8421e8d8..bde2cd807b6a7 100644 --- a/posthog/hogql_queries/insights/trends/breakdown.py +++ b/posthog/hogql_queries/insights/trends/breakdown.py @@ -3,9 +3,7 @@ from posthog.hogql.parser import parse_expr from posthog.hogql.timings import HogQLTimings from posthog.hogql_queries.insights.trends.breakdown_values import ( - BREAKDOWN_NULL_NUMERIC_LABEL, BREAKDOWN_NULL_STRING_LABEL, - BREAKDOWN_OTHER_NUMERIC_LABEL, BREAKDOWN_OTHER_STRING_LABEL, BreakdownValues, ) @@ -19,6 +17,10 @@ from posthog.schema import ActionsNode, EventsNode, DataWarehouseNode, HogQLQueryModifiers, InCohortVia, TrendsQuery +def hogql_to_string(expr: ast.Expr) -> ast.Call: + return ast.Call(name="toString", args=[expr]) + + class Breakdown: query: TrendsQuery team: Team @@ -27,7 +29,7 @@ class Breakdown: timings: HogQLTimings modifiers: HogQLQueryModifiers events_filter: ast.Expr - breakdown_values_override: Optional[List[str | int | float]] + breakdown_values_override: Optional[List[str]] def __init__( self, @@ -38,7 +40,7 @@ def __init__( timings: HogQLTimings, modifiers: HogQLQueryModifiers, events_filter: ast.Expr, - breakdown_values_override: Optional[List[str | int | float]] = None, + breakdown_values_override: Optional[List[str]] = None, ): self.team = team self.query = query @@ -70,19 +72,15 @@ def placeholders(self) -> Dict[str, ast.Expr]: return {"cross_join_breakdown_values": ast.Alias(alias="breakdown_value", expr=values)} - def column_expr(self) -> ast.Expr: + def column_expr(self) -> ast.Alias: if self.is_histogram_breakdown: return ast.Alias(alias="breakdown_value", expr=self._get_breakdown_histogram_multi_if()) - elif self.query.breakdownFilter.breakdown_type == "hogql": - return ast.Alias( - alias="breakdown_value", - expr=parse_expr(self.query.breakdownFilter.breakdown), - ) - elif self.query.breakdownFilter.breakdown_type == "cohort": + + if self.query.breakdownFilter.breakdown_type == "cohort": if self.modifiers.inCohortVia == InCohortVia.leftjoin_conjoined: return ast.Alias( alias="breakdown_value", - expr=ast.Field(chain=["__in_cohort", "cohort_id"]), + expr=hogql_to_string(ast.Field(chain=["__in_cohort", "cohort_id"])), ) cohort_breakdown = ( @@ -90,19 +88,9 @@ def column_expr(self) -> ast.Expr: ) return ast.Alias( alias="breakdown_value", - expr=ast.Constant(value=cohort_breakdown), - ) - - if self.query.breakdownFilter.breakdown_type == "hogql": - return ast.Alias( - alias="breakdown_value", - expr=parse_expr(self.query.breakdownFilter.breakdown), + expr=hogql_to_string(ast.Constant(value=cohort_breakdown)), ) - # If there's no breakdown values - if len(self._breakdown_values) == 1 and self._breakdown_values[0] is None: - return ast.Alias(alias="breakdown_value", expr=ast.Field(chain=self._properties_chain)) - return ast.Alias(alias="breakdown_value", expr=self._get_breakdown_transform_func) def events_where_filter(self) -> ast.Expr | None: @@ -148,15 +136,14 @@ def events_where_filter(self) -> ast.Expr | None: else: left = ast.Field(chain=self._properties_chain) + if not self.is_histogram_breakdown: + left = hogql_to_string(left) + compare_ops = [] for _value in self._breakdown_values: - value: Optional[str | int | float] = _value + value: Optional[str] = str(_value) # non-cohorts are always strings # If the value is one of the "other" values, then use the `transform()` func - if ( - value == BREAKDOWN_OTHER_STRING_LABEL - or value == BREAKDOWN_OTHER_NUMERIC_LABEL - or value == float(BREAKDOWN_OTHER_NUMERIC_LABEL) - ): + if value == BREAKDOWN_OTHER_STRING_LABEL: transform_func = self._get_breakdown_transform_func compare_ops.append( ast.CompareOperation( @@ -164,11 +151,7 @@ def events_where_filter(self) -> ast.Expr | None: ) ) else: - if ( - value == BREAKDOWN_NULL_STRING_LABEL - or value == BREAKDOWN_NULL_NUMERIC_LABEL - or value == float(BREAKDOWN_NULL_NUMERIC_LABEL) - ): + if value == BREAKDOWN_NULL_STRING_LABEL: value = None compare_ops.append( @@ -184,30 +167,25 @@ def events_where_filter(self) -> ast.Expr | None: @cached_property def _get_breakdown_transform_func(self) -> ast.Call: - values = self._breakdown_values - all_values_are_ints_or_none = all(isinstance(value, int) or value is None for value in values) - all_values_are_floats_or_none = all(isinstance(value, float) or value is None for value in values) - - if all_values_are_ints_or_none: - breakdown_other_value = BREAKDOWN_OTHER_NUMERIC_LABEL - breakdown_null_value = BREAKDOWN_NULL_NUMERIC_LABEL - elif all_values_are_floats_or_none: - breakdown_other_value = float(BREAKDOWN_OTHER_NUMERIC_LABEL) - breakdown_null_value = float(BREAKDOWN_NULL_NUMERIC_LABEL) - else: - breakdown_other_value = BREAKDOWN_OTHER_STRING_LABEL - breakdown_null_value = BREAKDOWN_NULL_STRING_LABEL + if self.query.breakdownFilter.breakdown_type == "hogql": + return self._get_breakdown_values_transform(parse_expr(self.query.breakdownFilter.breakdown)) + return self._get_breakdown_values_transform(ast.Field(chain=self._properties_chain)) + def _get_breakdown_values_transform(self, node: ast.Expr) -> ast.Call: + breakdown_values = self._breakdown_values_ast return ast.Call( name="transform", args=[ ast.Call( name="ifNull", - args=[ast.Field(chain=self._properties_chain), ast.Constant(value=breakdown_null_value)], + args=[ + hogql_to_string(node), + ast.Constant(value=BREAKDOWN_NULL_STRING_LABEL), + ], ), - self._breakdown_values_ast, - self._breakdown_values_ast, - ast.Constant(value=breakdown_other_value), + breakdown_values, + breakdown_values, + ast.Constant(value=BREAKDOWN_OTHER_STRING_LABEL), ], ) @@ -220,15 +198,21 @@ def _breakdown_buckets_ast(self) -> ast.Array: return ast.Array(exprs=list(map(lambda v: ast.Constant(value=v), values))) - @cached_property + @property def _breakdown_values_ast(self) -> ast.Array: - return ast.Array(exprs=[ast.Constant(value=v) for v in self._breakdown_values]) + exprs: list[ast.Expr] = [] + for value in self._breakdown_values: + if isinstance(value, str): + exprs.append(ast.Constant(value=value)) + else: + exprs.append(hogql_to_string(ast.Constant(value=value))) + return ast.Array(exprs=exprs) @cached_property - def _all_breakdown_values(self) -> List[str | int | float | None]: + def _all_breakdown_values(self) -> List[str | int | None]: # Used in the actors query if self.breakdown_values_override is not None: - return cast(List[str | int | float | None], self.breakdown_values_override) + return cast(List[str | int | None], self.breakdown_values_override) if self.query.breakdownFilter is None: return [] @@ -243,25 +227,12 @@ def _all_breakdown_values(self) -> List[str | int | float | None]: query_date_range=self.query_date_range, modifiers=self.modifiers, ) - return cast(List[str | int | float | None], breakdown.get_breakdown_values()) + return cast(List[str | int | None], breakdown.get_breakdown_values()) @cached_property - def _breakdown_values(self) -> List[str | int | float]: - values = self._all_breakdown_values - if len(values) == 0 or all(value is None for value in values): - return [] - - if None in values: - all_values_are_ints_or_none = all(isinstance(value, int) or value is None for value in values) - all_values_are_floats_or_none = all(isinstance(value, float) or value is None for value in values) - - if all_values_are_ints_or_none: - values = [v if v is not None else BREAKDOWN_NULL_NUMERIC_LABEL for v in values] - elif all_values_are_floats_or_none: - values = [v if v is not None else float(BREAKDOWN_NULL_NUMERIC_LABEL) for v in values] - else: - values = [v if v is not None else BREAKDOWN_NULL_STRING_LABEL for v in values] - return cast(List[str | int | float], values) + def _breakdown_values(self) -> List[str | int]: + values = [BREAKDOWN_NULL_STRING_LABEL if v is None else v for v in self._all_breakdown_values] + return cast(List[str | int], values) @cached_property def has_breakdown_values(self) -> bool: diff --git a/posthog/hogql_queries/insights/trends/breakdown_values.py b/posthog/hogql_queries/insights/trends/breakdown_values.py index 7b1522d5f25c5..d9ab11891f210 100644 --- a/posthog/hogql_queries/insights/trends/breakdown_values.py +++ b/posthog/hogql_queries/insights/trends/breakdown_values.py @@ -97,6 +97,9 @@ def get_breakdown_values(self) -> List[str | int]: ), ) + if not self.histogram_bin_count: + select_field.expr = ast.Call(name="toString", args=[select_field.expr]) + if self.chart_display_type == ChartDisplayType.WorldMap: breakdown_limit = BREAKDOWN_VALUES_LIMIT_FOR_COUNTRIES else: @@ -211,23 +214,9 @@ def get_breakdown_values(self) -> List[str | int]: # Add "other" value if "other" is not hidden and we're not bucketing numeric values if self.hide_other_aggregation is not True and self.histogram_bin_count is None: - all_values_are_ints_or_none = all(isinstance(value, int) or value is None for value in values) - all_values_are_floats_or_none = all(isinstance(value, float) or value is None for value in values) - all_values_are_string_or_none = all(isinstance(value, str) or value is None for value in values) - - if all_values_are_string_or_none: - values = [BREAKDOWN_NULL_STRING_LABEL if value in (None, "") else value for value in values] - if needs_other: - values.insert(0, BREAKDOWN_OTHER_STRING_LABEL) - elif all_values_are_ints_or_none or all_values_are_floats_or_none: - if all_values_are_ints_or_none: - values = [BREAKDOWN_NULL_NUMERIC_LABEL if value is None else value for value in values] - if needs_other: - values.insert(0, BREAKDOWN_OTHER_NUMERIC_LABEL) - else: - values = [float(BREAKDOWN_NULL_NUMERIC_LABEL) if value is None else value for value in values] - if needs_other: - values.insert(0, float(BREAKDOWN_OTHER_NUMERIC_LABEL)) + values = [BREAKDOWN_NULL_STRING_LABEL if value in (None, "") else value for value in values] + if needs_other: + values = [BREAKDOWN_OTHER_STRING_LABEL] + values if len(values) == 0: values.insert(0, None) diff --git a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr index f6eb3748afb2b..e2ec22fb9fb1c 100644 --- a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr +++ b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr @@ -187,7 +187,7 @@ # --- # name: TestTrends.test_breakdown_by_group_props_person_on_events ''' - SELECT e__group_0.properties___industry AS value, + SELECT toString(e__group_0.properties___industry) AS value, count(e.uuid) AS count FROM events AS e LEFT JOIN @@ -210,7 +210,7 @@ ''' SELECT groupArray(day_start) AS date, groupArray(count) AS total, - ifNull(toString(breakdown_value), '') AS breakdown_value + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value FROM (SELECT sum(total) AS count, day_start AS day_start, @@ -231,7 +231,7 @@ ORDER BY sec.breakdown_value ASC, day_start ASC UNION ALL SELECT count(e.uuid) AS total, toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - transform(ifNull(e__group_0.properties___industry, '$$_posthog_breakdown_null_$$'), ['finance', 'technology'], ['finance', 'technology'], '$$_posthog_breakdown_other_$$') AS breakdown_value + transform(ifNull(toString(e__group_0.properties___industry), '$$_posthog_breakdown_null_$$'), ['finance', 'technology'], ['finance', 'technology'], '$$_posthog_breakdown_other_$$') AS breakdown_value FROM events AS e SAMPLE 1 LEFT JOIN (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, @@ -241,7 +241,7 @@ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) GROUP BY groups.group_type_index, groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(e__group_0.properties___industry, 'finance'), 0), ifNull(equals(e__group_0.properties___industry, 'technology'), 0))) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(toString(e__group_0.properties___industry), 'finance'), 0), ifNull(equals(toString(e__group_0.properties___industry), 'technology'), 0))) GROUP BY day_start, breakdown_value) GROUP BY day_start, @@ -287,7 +287,7 @@ # --- # name: TestTrends.test_breakdown_by_group_props_with_person_filter_person_on_events ''' - SELECT e__group_0.properties___industry AS value, + SELECT toString(e__group_0.properties___industry) AS value, count(e.uuid) AS count FROM events AS e LEFT JOIN @@ -310,7 +310,7 @@ ''' SELECT groupArray(day_start) AS date, groupArray(count) AS total, - ifNull(toString(breakdown_value), '') AS breakdown_value + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value FROM (SELECT sum(total) AS count, day_start AS day_start, @@ -331,7 +331,7 @@ ORDER BY sec.breakdown_value ASC, day_start ASC UNION ALL SELECT count(e.uuid) AS total, toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - transform(ifNull(e__group_0.properties___industry, '$$_posthog_breakdown_null_$$'), ['finance'], ['finance'], '$$_posthog_breakdown_other_$$') AS breakdown_value + transform(ifNull(toString(e__group_0.properties___industry), '$$_posthog_breakdown_null_$$'), ['finance'], ['finance'], '$$_posthog_breakdown_other_$$') AS breakdown_value FROM events AS e SAMPLE 1 LEFT JOIN (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, @@ -341,7 +341,7 @@ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) GROUP BY groups.group_type_index, groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, 'key'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(e__group_0.properties___industry, 'finance'), 0)) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, 'key'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(toString(e__group_0.properties___industry), 'finance'), 0)) GROUP BY day_start, breakdown_value) GROUP BY day_start, @@ -356,7 +356,7 @@ # --- # name: TestTrends.test_breakdown_filtering_with_properties_in_new_format ''' - SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', '') AS value, + SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', '')) AS value, count(e.uuid) AS count FROM events AS e WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), 'Firefox'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Windows'), 0)), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0))) @@ -371,7 +371,7 @@ ''' SELECT groupArray(day_start) AS date, groupArray(count) AS total, - ifNull(toString(breakdown_value), '') AS breakdown_value + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value FROM (SELECT sum(total) AS count, day_start AS day_start, @@ -392,9 +392,9 @@ ORDER BY sec.breakdown_value ASC, day_start ASC UNION ALL SELECT count(e.uuid) AS total, toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['second url'], ['second url'], '$$_posthog_breakdown_other_$$') AS breakdown_value + transform(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', '')), '$$_posthog_breakdown_null_$$'), ['second url'], ['second url'], '$$_posthog_breakdown_other_$$') AS breakdown_value FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), 'Firefox'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Windows'), 0)), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', ''), 'second url'), 0)) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), 'Firefox'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Windows'), 0)), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0), ifNull(equals(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', '')), 'second url'), 0)) GROUP BY day_start, breakdown_value) GROUP BY day_start, @@ -409,7 +409,7 @@ # --- # name: TestTrends.test_breakdown_filtering_with_properties_in_new_format.2 ''' - SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', '') AS value, + SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', '')) AS value, count(e.uuid) AS count FROM events AS e WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), 'Firefox'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Windows'), 0)), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0))) @@ -423,24 +423,38 @@ # name: TestTrends.test_breakdown_filtering_with_properties_in_new_format.3 ''' SELECT groupArray(day_start) AS date, - groupArray(count) AS total + groupArray(count) AS total, + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value FROM (SELECT sum(total) AS count, - day_start AS day_start + day_start AS day_start, + breakdown_value AS breakdown_value FROM (SELECT 0 AS total, - minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start - FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) AS numbers - UNION ALL SELECT 0 AS total, - toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC'))) AS day_start + ticks.day_start AS day_start, + sec.breakdown_value AS breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start + FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) AS numbers + UNION ALL SELECT toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC'))) AS day_start) AS ticks + CROSS JOIN + (SELECT breakdown_value + FROM + (SELECT ['$$_posthog_breakdown_null_$$'] AS breakdown_value) ARRAY + JOIN breakdown_value AS breakdown_value) AS sec + ORDER BY sec.breakdown_value ASC, day_start ASC UNION ALL SELECT count(e.uuid) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + transform(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', '')), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_null_$$'], ['$$_posthog_breakdown_null_$$'], '$$_posthog_breakdown_other_$$') AS breakdown_value FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), 'Firefox'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Windows'), 0)), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0)) - GROUP BY day_start) - GROUP BY day_start - ORDER BY day_start ASC) - ORDER BY sum(count) DESC + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), 'Firefox'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Windows'), 0)), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0), isNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', '')))) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY sum(count) DESC, breakdown_value ASC LIMIT 10000 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -448,7 +462,7 @@ # --- # name: TestTrends.test_breakdown_weekly_active_users_aggregated ''' - SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '') AS value, + SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')) AS value, count(DISTINCT e__pdi.person_id) AS count FROM events AS e INNER JOIN @@ -480,7 +494,7 @@ CROSS JOIN (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e__pdi.person_id AS actor_id, - transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['val', 'bor'], ['val', 'bor'], '$$_posthog_breakdown_other_$$') AS breakdown_value + transform(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')), '$$_posthog_breakdown_null_$$'), ['val', 'bor'], ['val', 'bor'], '$$_posthog_breakdown_other_$$') AS breakdown_value FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, @@ -489,7 +503,7 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'bor'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0)) + WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), or(ifNull(equals(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')), 'val'), 0), ifNull(equals(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')), 'bor'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0)) GROUP BY timestamp, actor_id, breakdown_value) AS e WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) @@ -506,7 +520,7 @@ # --- # name: TestTrends.test_breakdown_weekly_active_users_aggregated_materialized ''' - SELECT nullIf(nullIf(e.mat_key, ''), 'null') AS value, + SELECT toString(nullIf(nullIf(e.mat_key, ''), 'null')) AS value, count(DISTINCT e__pdi.person_id) AS count FROM events AS e INNER JOIN @@ -538,7 +552,7 @@ CROSS JOIN (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e__pdi.person_id AS actor_id, - transform(ifNull(nullIf(nullIf(e.mat_key, ''), 'null'), '$$_posthog_breakdown_null_$$'), ['val', 'bor'], ['val', 'bor'], '$$_posthog_breakdown_other_$$') AS breakdown_value + transform(ifNull(toString(nullIf(nullIf(e.mat_key, ''), 'null')), '$$_posthog_breakdown_null_$$'), ['val', 'bor'], ['val', 'bor'], '$$_posthog_breakdown_other_$$') AS breakdown_value FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, @@ -547,7 +561,7 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), or(ifNull(equals(nullIf(nullIf(e.mat_key, ''), 'null'), 'val'), 0), ifNull(equals(nullIf(nullIf(e.mat_key, ''), 'null'), 'bor'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0)) + WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), or(ifNull(equals(toString(nullIf(nullIf(e.mat_key, ''), 'null')), 'val'), 0), ifNull(equals(toString(nullIf(nullIf(e.mat_key, ''), 'null')), 'bor'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0)) GROUP BY timestamp, actor_id, breakdown_value) AS e WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) @@ -584,7 +598,7 @@ # --- # name: TestTrends.test_breakdown_weekly_active_users_daily_based_on_action.2 ''' - SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '') AS value, + SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')) AS value, count(DISTINCT e__pdi.person_id) AS count FROM events AS e INNER JOIN @@ -622,7 +636,7 @@ ''' SELECT groupArray(day_start) AS date, groupArray(count) AS total, - ifNull(toString(breakdown_value), '') AS breakdown_value + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value FROM (SELECT sum(total) AS count, day_start AS day_start, @@ -654,7 +668,7 @@ CROSS JOIN (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e__pdi.person_id AS actor_id, - transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['val'], ['val'], '$$_posthog_breakdown_other_$$') AS breakdown_value + transform(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')), '$$_posthog_breakdown_null_$$'), ['val'], ['val'], '$$_posthog_breakdown_other_$$') AS breakdown_value FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, @@ -679,7 +693,7 @@ FROM cohortpeople WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version - HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0)), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0)) + HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))), ifNull(equals(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')), 'val'), 0)), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0)) GROUP BY timestamp, actor_id, breakdown_value) AS e WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) @@ -699,7 +713,7 @@ # --- # name: TestTrends.test_breakdown_with_filter_groups_person_on_events ''' - SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '') AS value, + SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')) AS value, count(e.uuid) AS count FROM events AS e LEFT JOIN @@ -722,7 +736,7 @@ ''' SELECT groupArray(day_start) AS date, groupArray(count) AS total, - ifNull(toString(breakdown_value), '') AS breakdown_value + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value FROM (SELECT sum(total) AS count, day_start AS day_start, @@ -743,7 +757,7 @@ ORDER BY sec.breakdown_value ASC, day_start ASC UNION ALL SELECT count(e.uuid) AS total, toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['uh', 'oh'], ['uh', 'oh'], '$$_posthog_breakdown_other_$$') AS breakdown_value + transform(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')), '$$_posthog_breakdown_null_$$'), ['uh', 'oh'], ['uh', 'oh'], '$$_posthog_breakdown_other_$$') AS breakdown_value FROM events AS e SAMPLE 1 LEFT JOIN (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, @@ -753,7 +767,7 @@ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) GROUP BY groups.group_type_index, groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(e__group_0.properties___industry, 'finance'), 0), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'uh'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'oh'), 0))) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(e__group_0.properties___industry, 'finance'), 0), or(ifNull(equals(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')), 'uh'), 0), ifNull(equals(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')), 'oh'), 0))) GROUP BY day_start, breakdown_value) GROUP BY day_start, @@ -782,7 +796,7 @@ # --- # name: TestTrends.test_breakdown_with_filter_groups_person_on_events_v2.1 ''' - SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '') AS value, + SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')) AS value, count(e.uuid) AS count FROM events AS e LEFT JOIN @@ -805,7 +819,7 @@ ''' SELECT groupArray(day_start) AS date, groupArray(count) AS total, - ifNull(toString(breakdown_value), '') AS breakdown_value + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value FROM (SELECT sum(total) AS count, day_start AS day_start, @@ -826,7 +840,7 @@ ORDER BY sec.breakdown_value ASC, day_start ASC UNION ALL SELECT count(DISTINCT ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id)) AS total, toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['uh', 'oh'], ['uh', 'oh'], '$$_posthog_breakdown_other_$$') AS breakdown_value + transform(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')), '$$_posthog_breakdown_null_$$'), ['uh', 'oh'], ['uh', 'oh'], '$$_posthog_breakdown_other_$$') AS breakdown_value FROM events AS e SAMPLE 1 LEFT OUTER JOIN (SELECT argMax(person_overrides.override_person_id, person_overrides.version) AS override_person_id, @@ -842,7 +856,7 @@ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) GROUP BY groups.group_type_index, groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(e__group_0.properties___industry, 'finance'), 0), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'uh'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'oh'), 0))) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(e__group_0.properties___industry, 'finance'), 0), or(ifNull(equals(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')), 'uh'), 0), ifNull(equals(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')), 'oh'), 0))) GROUP BY day_start, breakdown_value) GROUP BY day_start, @@ -857,7 +871,7 @@ # --- # name: TestTrends.test_dau_with_breakdown_filtering_with_sampling ''' - SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value, + SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')) AS value, count(e.uuid) AS count FROM events AS e WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'))) @@ -872,7 +886,7 @@ ''' SELECT groupArray(day_start) AS date, groupArray(count) AS total, - ifNull(toString(breakdown_value), '') AS breakdown_value + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value FROM (SELECT sum(total) AS count, day_start AS day_start, @@ -893,7 +907,7 @@ ORDER BY sec.breakdown_value ASC, day_start ASC UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['other_value', '$$_posthog_breakdown_null_$$', 'value'], ['other_value', '$$_posthog_breakdown_null_$$', 'value'], '$$_posthog_breakdown_other_$$') AS breakdown_value + transform(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), '$$_posthog_breakdown_null_$$'), ['other_value', '$$_posthog_breakdown_null_$$', 'value'], ['other_value', '$$_posthog_breakdown_null_$$', 'value'], '$$_posthog_breakdown_other_$$') AS breakdown_value FROM events AS e SAMPLE 1.0 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, @@ -902,7 +916,7 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0), isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0))) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), 'other_value'), 0), isNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''))), ifNull(equals(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), 'value'), 0))) GROUP BY day_start, breakdown_value) GROUP BY day_start, @@ -917,7 +931,7 @@ # --- # name: TestTrends.test_dau_with_breakdown_filtering_with_sampling.2 ''' - SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value, + SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')) AS value, count(e.uuid) AS count FROM events AS e WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'))) @@ -932,7 +946,7 @@ ''' SELECT groupArray(day_start) AS date, groupArray(count) AS total, - ifNull(toString(breakdown_value), '') AS breakdown_value + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value FROM (SELECT sum(total) AS count, day_start AS day_start, @@ -953,7 +967,7 @@ ORDER BY sec.breakdown_value ASC, day_start ASC UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['other_value', '$$_posthog_breakdown_null_$$', 'value'], ['other_value', '$$_posthog_breakdown_null_$$', 'value'], '$$_posthog_breakdown_other_$$') AS breakdown_value + transform(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), '$$_posthog_breakdown_null_$$'), ['other_value', '$$_posthog_breakdown_null_$$', 'value'], ['other_value', '$$_posthog_breakdown_null_$$', 'value'], '$$_posthog_breakdown_other_$$') AS breakdown_value FROM events AS e SAMPLE 1.0 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, @@ -962,7 +976,7 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0), isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0))) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), 'other_value'), 0), isNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''))), ifNull(equals(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), 'value'), 0))) GROUP BY day_start, breakdown_value) GROUP BY day_start, @@ -1242,7 +1256,7 @@ # --- # name: TestTrends.test_mau_with_breakdown_filtering_and_prop_filter ''' - SELECT e__pdi__person.`properties___$some_prop` AS value, + SELECT toString(e__pdi__person.`properties___$some_prop`) AS value, count(DISTINCT e__pdi.person_id) AS count FROM events AS e INNER JOIN @@ -1276,7 +1290,7 @@ ''' SELECT groupArray(day_start) AS date, groupArray(count) AS total, - ifNull(toString(breakdown_value), '') AS breakdown_value + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value FROM (SELECT sum(total) AS count, day_start AS day_start, @@ -1308,7 +1322,7 @@ CROSS JOIN (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, e__pdi.person_id AS actor_id, - transform(ifNull(e__pdi__person.`properties___$some_prop`, '$$_posthog_breakdown_null_$$'), ['some_val2', 'some_val'], ['some_val2', 'some_val'], '$$_posthog_breakdown_other_$$') AS breakdown_value + transform(ifNull(toString(e__pdi__person.`properties___$some_prop`), '$$_posthog_breakdown_null_$$'), ['some_val2', 'some_val'], ['some_val2', 'some_val'], '$$_posthog_breakdown_other_$$') AS breakdown_value FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, @@ -1329,7 +1343,7 @@ WHERE equals(person.team_id, 2) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), and(equals(e.event, 'sign up'), ifNull(equals(e__pdi__person.properties___filter_prop, 'filter_val'), 0), or(ifNull(equals(e__pdi__person.`properties___$some_prop`, 'some_val2'), 0), ifNull(equals(e__pdi__person.`properties___$some_prop`, 'some_val'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), toIntervalDay(30))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) + WHERE and(equals(e.team_id, 2), and(equals(e.event, 'sign up'), ifNull(equals(e__pdi__person.properties___filter_prop, 'filter_val'), 0), or(ifNull(equals(toString(e__pdi__person.`properties___$some_prop`), 'some_val2'), 0), ifNull(equals(toString(e__pdi__person.`properties___$some_prop`), 'some_val'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), toIntervalDay(30))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) GROUP BY timestamp, actor_id, breakdown_value) AS e WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(29))), 0)) @@ -1349,7 +1363,7 @@ # --- # name: TestTrends.test_mau_with_breakdown_filtering_and_prop_filter_poe_v2 ''' - SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS value, + SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, '$some_prop'), ''), 'null'), '^"|"$', '')) AS value, count(DISTINCT ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id)) AS count FROM events AS e LEFT OUTER JOIN @@ -1370,7 +1384,7 @@ ''' SELECT groupArray(day_start) AS date, groupArray(count) AS total, - ifNull(toString(breakdown_value), '') AS breakdown_value + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value FROM (SELECT sum(total) AS count, day_start AS day_start, @@ -1402,7 +1416,7 @@ CROSS JOIN (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id) AS actor_id, - transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, '$some_prop'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['some_val2', 'some_val'], ['some_val2', 'some_val'], '$$_posthog_breakdown_other_$$') AS breakdown_value + transform(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, '$some_prop'), ''), 'null'), '^"|"$', '')), '$$_posthog_breakdown_null_$$'), ['some_val2', 'some_val'], ['some_val2', 'some_val'], '$$_posthog_breakdown_other_$$') AS breakdown_value FROM events AS e SAMPLE 1 LEFT OUTER JOIN (SELECT argMax(person_overrides.override_person_id, person_overrides.version) AS override_person_id, @@ -1410,7 +1424,7 @@ FROM person_overrides WHERE equals(person_overrides.team_id, 2) GROUP BY person_overrides.old_person_id) AS e__override ON equals(e.person_id, e__override.old_person_id) - WHERE and(equals(e.team_id, 2), and(equals(e.event, 'sign up'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, 'filter_prop'), ''), 'null'), '^"|"$', ''), 'filter_val'), 0), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, '$some_prop'), ''), 'null'), '^"|"$', ''), 'some_val2'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, '$some_prop'), ''), 'null'), '^"|"$', ''), 'some_val'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), toIntervalDay(30))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) + WHERE and(equals(e.team_id, 2), and(equals(e.event, 'sign up'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, 'filter_prop'), ''), 'null'), '^"|"$', ''), 'filter_val'), 0), or(ifNull(equals(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, '$some_prop'), ''), 'null'), '^"|"$', '')), 'some_val2'), 0), ifNull(equals(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, '$some_prop'), ''), 'null'), '^"|"$', '')), 'some_val'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), toIntervalDay(30))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) GROUP BY timestamp, actor_id, breakdown_value) AS e WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(29))), 0)) @@ -1476,7 +1490,7 @@ # --- # name: TestTrends.test_person_filtering_in_cohort_in_action.2 ''' - SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value, + SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')) AS value, count(e.uuid) AS count FROM events AS e INNER JOIN @@ -1503,7 +1517,7 @@ ''' SELECT groupArray(day_start) AS date, groupArray(count) AS total, - ifNull(toString(breakdown_value), '') AS breakdown_value + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value FROM (SELECT sum(total) AS count, day_start AS day_start, @@ -1524,7 +1538,7 @@ ORDER BY sec.breakdown_value ASC, day_start ASC UNION ALL SELECT count(e.uuid) AS total, toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_null_$$', 'value', 'other_value'], ['$$_posthog_breakdown_null_$$', 'value', 'other_value'], '$$_posthog_breakdown_other_$$') AS breakdown_value + transform(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_null_$$', 'value', 'other_value'], ['$$_posthog_breakdown_null_$$', 'value', 'other_value'], '$$_posthog_breakdown_other_$$') AS breakdown_value FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, @@ -1538,7 +1552,7 @@ FROM cohortpeople WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version - HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0)), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0))) + HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0)), or(isNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''))), ifNull(equals(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), 'value'), 0), ifNull(equals(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), 'other_value'), 0))) GROUP BY day_start, breakdown_value) GROUP BY day_start, @@ -1573,7 +1587,7 @@ # --- # name: TestTrends.test_person_filtering_in_cohort_in_action_poe_v2.2 ''' - SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value, + SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')) AS value, count(e.uuid) AS count FROM events AS e LEFT OUTER JOIN @@ -1599,7 +1613,7 @@ ''' SELECT groupArray(day_start) AS date, groupArray(count) AS total, - ifNull(toString(breakdown_value), '') AS breakdown_value + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value FROM (SELECT sum(total) AS count, day_start AS day_start, @@ -1620,7 +1634,7 @@ ORDER BY sec.breakdown_value ASC, day_start ASC UNION ALL SELECT count(e.uuid) AS total, toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_null_$$', 'value', 'other_value'], ['$$_posthog_breakdown_null_$$', 'value', 'other_value'], '$$_posthog_breakdown_other_$$') AS breakdown_value + transform(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_null_$$', 'value', 'other_value'], ['$$_posthog_breakdown_null_$$', 'value', 'other_value'], '$$_posthog_breakdown_other_$$') AS breakdown_value FROM events AS e SAMPLE 1 LEFT OUTER JOIN (SELECT argMax(person_overrides.override_person_id, person_overrides.version) AS override_person_id, @@ -1633,7 +1647,7 @@ FROM cohortpeople WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version - HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0)), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0))) + HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0)), or(isNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''))), ifNull(equals(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), 'value'), 0), ifNull(equals(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), 'other_value'), 0))) GROUP BY day_start, breakdown_value) GROUP BY day_start, @@ -2217,7 +2231,7 @@ # --- # name: TestTrends.test_timezones_daily.4 ''' - SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS value, + SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '')) AS value, count(e.uuid) AS count FROM events AS e WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'))) @@ -2232,7 +2246,7 @@ ''' SELECT groupArray(day_start) AS date, groupArray(count) AS total, - ifNull(toString(breakdown_value), '') AS breakdown_value + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value FROM (SELECT sum(total) AS count, day_start AS day_start, @@ -2253,7 +2267,7 @@ ORDER BY sec.breakdown_value ASC, day_start ASC UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['Mac'], ['Mac'], '$$_posthog_breakdown_other_$$') AS breakdown_value + transform(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '')), '$$_posthog_breakdown_null_$$'), ['Mac'], ['Mac'], '$$_posthog_breakdown_other_$$') AS breakdown_value FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, @@ -2262,7 +2276,7 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0)) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '')), 'Mac'), 0)) GROUP BY day_start, breakdown_value) GROUP BY day_start, @@ -2408,7 +2422,7 @@ # --- # name: TestTrends.test_timezones_daily_minus_utc.4 ''' - SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS value, + SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '')) AS value, count(e.uuid) AS count FROM events AS e WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up'))) @@ -2423,7 +2437,7 @@ ''' SELECT groupArray(day_start) AS date, groupArray(count) AS total, - ifNull(toString(breakdown_value), '') AS breakdown_value + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value FROM (SELECT sum(total) AS count, day_start AS day_start, @@ -2444,7 +2458,7 @@ ORDER BY sec.breakdown_value ASC, day_start ASC UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, toStartOfDay(toTimeZone(e.timestamp, 'America/Phoenix')) AS day_start, - transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['Mac'], ['Mac'], '$$_posthog_breakdown_other_$$') AS breakdown_value + transform(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '')), '$$_posthog_breakdown_null_$$'), ['Mac'], ['Mac'], '$$_posthog_breakdown_other_$$') AS breakdown_value FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, @@ -2453,7 +2467,7 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0)) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up'), ifNull(equals(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '')), 'Mac'), 0)) GROUP BY day_start, breakdown_value) GROUP BY day_start, @@ -2599,7 +2613,7 @@ # --- # name: TestTrends.test_timezones_daily_plus_utc.4 ''' - SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS value, + SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '')) AS value, count(e.uuid) AS count FROM events AS e WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up'))) @@ -2614,7 +2628,7 @@ ''' SELECT groupArray(day_start) AS date, groupArray(count) AS total, - ifNull(toString(breakdown_value), '') AS breakdown_value + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value FROM (SELECT sum(total) AS count, day_start AS day_start, @@ -2635,7 +2649,7 @@ ORDER BY sec.breakdown_value ASC, day_start ASC UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, toStartOfDay(toTimeZone(e.timestamp, 'Asia/Tokyo')) AS day_start, - transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['Mac'], ['Mac'], '$$_posthog_breakdown_other_$$') AS breakdown_value + transform(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '')), '$$_posthog_breakdown_null_$$'), ['Mac'], ['Mac'], '$$_posthog_breakdown_other_$$') AS breakdown_value FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, @@ -2644,7 +2658,7 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0)) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up'), ifNull(equals(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '')), 'Mac'), 0)) GROUP BY day_start, breakdown_value) GROUP BY day_start, @@ -2992,7 +3006,7 @@ # --- # name: TestTrends.test_trend_breakdown_user_props_with_filter_with_partial_property_pushdowns ''' - SELECT e__pdi__person.properties___email AS value, + SELECT toString(e__pdi__person.properties___email) AS value, count(e.uuid) AS count FROM events AS e INNER JOIN @@ -3027,7 +3041,7 @@ ''' SELECT groupArray(day_start) AS date, groupArray(count) AS total, - ifNull(toString(breakdown_value), '') AS breakdown_value + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value FROM (SELECT sum(total) AS count, day_start AS day_start, @@ -3048,7 +3062,7 @@ ORDER BY sec.breakdown_value ASC, day_start ASC UNION ALL SELECT count(e.uuid) AS total, toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - transform(ifNull(e__pdi__person.properties___email, '$$_posthog_breakdown_null_$$'), ['test2@posthog.com', 'test@gmail.com', 'test5@posthog.com', 'test4@posthog.com', 'test3@posthog.com'], ['test2@posthog.com', 'test@gmail.com', 'test5@posthog.com', 'test4@posthog.com', 'test3@posthog.com'], '$$_posthog_breakdown_other_$$') AS breakdown_value + transform(ifNull(toString(e__pdi__person.properties___email), '$$_posthog_breakdown_null_$$'), ['test2@posthog.com', 'test@gmail.com', 'test5@posthog.com', 'test4@posthog.com', 'test3@posthog.com'], ['test2@posthog.com', 'test@gmail.com', 'test5@posthog.com', 'test4@posthog.com', 'test3@posthog.com'], '$$_posthog_breakdown_other_$$') AS breakdown_value FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, @@ -3070,7 +3084,7 @@ WHERE equals(person.team_id, 2) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(or(ifNull(notILike(e__pdi__person.properties___email, '%@posthog.com%'), 1), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0)), or(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'safari'), 0))), or(ifNull(equals(e__pdi__person.properties___email, 'test2@posthog.com'), 0), ifNull(equals(e__pdi__person.properties___email, 'test@gmail.com'), 0), ifNull(equals(e__pdi__person.properties___email, 'test5@posthog.com'), 0), ifNull(equals(e__pdi__person.properties___email, 'test4@posthog.com'), 0), ifNull(equals(e__pdi__person.properties___email, 'test3@posthog.com'), 0))) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(or(ifNull(notILike(e__pdi__person.properties___email, '%@posthog.com%'), 1), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0)), or(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'safari'), 0))), or(ifNull(equals(toString(e__pdi__person.properties___email), 'test2@posthog.com'), 0), ifNull(equals(toString(e__pdi__person.properties___email), 'test@gmail.com'), 0), ifNull(equals(toString(e__pdi__person.properties___email), 'test5@posthog.com'), 0), ifNull(equals(toString(e__pdi__person.properties___email), 'test4@posthog.com'), 0), ifNull(equals(toString(e__pdi__person.properties___email), 'test3@posthog.com'), 0))) GROUP BY day_start, breakdown_value) GROUP BY day_start, @@ -3085,7 +3099,7 @@ # --- # name: TestTrends.test_trend_breakdown_user_props_with_filter_with_partial_property_pushdowns.2 ''' - SELECT e__pdi__person.properties___email AS value, + SELECT toString(e__pdi__person.properties___email) AS value, count(e.uuid) AS count FROM events AS e INNER JOIN @@ -3120,7 +3134,7 @@ ''' SELECT groupArray(day_start) AS date, groupArray(count) AS total, - ifNull(toString(breakdown_value), '') AS breakdown_value + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value FROM (SELECT sum(total) AS count, day_start AS day_start, @@ -3141,7 +3155,7 @@ ORDER BY sec.breakdown_value ASC, day_start ASC UNION ALL SELECT count(e.uuid) AS total, toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - transform(ifNull(e__pdi__person.properties___email, '$$_posthog_breakdown_null_$$'), ['test2@posthog.com'], ['test2@posthog.com'], '$$_posthog_breakdown_other_$$') AS breakdown_value + transform(ifNull(toString(e__pdi__person.properties___email), '$$_posthog_breakdown_null_$$'), ['test2@posthog.com'], ['test2@posthog.com'], '$$_posthog_breakdown_other_$$') AS breakdown_value FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, @@ -3163,7 +3177,7 @@ WHERE equals(person.team_id, 2) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'chrome'), 0)), and(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0), ifNull(ilike(e__pdi__person.properties___email, '%@posthog.com%'), 0)), ifNull(equals(e__pdi__person.properties___email, 'test2@posthog.com'), 0)) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'chrome'), 0)), and(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0), ifNull(ilike(e__pdi__person.properties___email, '%@posthog.com%'), 0)), ifNull(equals(toString(e__pdi__person.properties___email), 'test2@posthog.com'), 0)) GROUP BY day_start, breakdown_value) GROUP BY day_start, @@ -3248,7 +3262,7 @@ # --- # name: TestTrends.test_trends_aggregate_by_distinct_id.2 ''' - SELECT e__pdi__person.`properties___$some_prop` AS value, + SELECT toString(e__pdi__person.`properties___$some_prop`) AS value, count(e.uuid) AS count FROM events AS e INNER JOIN @@ -3281,7 +3295,7 @@ ''' SELECT groupArray(day_start) AS date, groupArray(count) AS total, - ifNull(toString(breakdown_value), '') AS breakdown_value + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value FROM (SELECT sum(total) AS count, day_start AS day_start, @@ -3302,7 +3316,7 @@ ORDER BY sec.breakdown_value ASC, day_start ASC UNION ALL SELECT count(DISTINCT e.distinct_id) AS total, toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - transform(ifNull(e__pdi__person.`properties___$some_prop`, '$$_posthog_breakdown_null_$$'), ['some_val', '$$_posthog_breakdown_null_$$'], ['some_val', '$$_posthog_breakdown_null_$$'], '$$_posthog_breakdown_other_$$') AS breakdown_value + transform(ifNull(toString(e__pdi__person.`properties___$some_prop`), '$$_posthog_breakdown_null_$$'), ['some_val', '$$_posthog_breakdown_null_$$'], ['some_val', '$$_posthog_breakdown_null_$$'], '$$_posthog_breakdown_other_$$') AS breakdown_value FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, @@ -3322,7 +3336,7 @@ WHERE equals(person.team_id, 2) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(e__pdi__person.`properties___$some_prop`, 'some_val'), 0), isNull(e__pdi__person.`properties___$some_prop`))) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(toString(e__pdi__person.`properties___$some_prop`), 'some_val'), 0), isNull(toString(e__pdi__person.`properties___$some_prop`)))) GROUP BY day_start, breakdown_value) GROUP BY day_start, @@ -3415,7 +3429,7 @@ # --- # name: TestTrends.test_trends_aggregate_by_distinct_id.6 ''' - SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS value, + SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_prop'), ''), 'null'), '^"|"$', '')) AS value, count(e.uuid) AS count FROM events AS e WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'))) @@ -3430,7 +3444,7 @@ ''' SELECT groupArray(day_start) AS date, groupArray(count) AS total, - ifNull(toString(breakdown_value), '') AS breakdown_value + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value FROM (SELECT sum(total) AS count, day_start AS day_start, @@ -3451,9 +3465,9 @@ ORDER BY sec.breakdown_value ASC, day_start ASC UNION ALL SELECT count(DISTINCT e.distinct_id) AS total, toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_prop'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_null_$$'], ['$$_posthog_breakdown_null_$$'], '$$_posthog_breakdown_other_$$') AS breakdown_value + transform(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_prop'), ''), 'null'), '^"|"$', '')), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_null_$$'], ['$$_posthog_breakdown_null_$$'], '$$_posthog_breakdown_other_$$') AS breakdown_value FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_prop'), ''), 'null'), '^"|"$', ''))) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), isNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_prop'), ''), 'null'), '^"|"$', '')))) GROUP BY day_start, breakdown_value) GROUP BY day_start, @@ -3520,7 +3534,7 @@ # --- # name: TestTrends.test_trends_breakdown_cumulative ''' - SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value, + SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')) AS value, count(e.uuid) AS count FROM events AS e WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'))) @@ -3535,7 +3549,7 @@ ''' SELECT groupArray(day_start) AS date, groupArray(count) AS total, - ifNull(toString(breakdown_value), '') AS breakdown_value + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value FROM (SELECT day_start AS day_start, sum(count) OVER (PARTITION BY breakdown_value @@ -3561,7 +3575,7 @@ ORDER BY sec.breakdown_value ASC, day_start ASC UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total, min(toStartOfDay(toTimeZone(e.timestamp, 'UTC'))) AS day_start, - transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_null_$$', 'value', 'other_value'], ['$$_posthog_breakdown_null_$$', 'value', 'other_value'], '$$_posthog_breakdown_other_$$') AS breakdown_value + transform(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_null_$$', 'value', 'other_value'], ['$$_posthog_breakdown_null_$$', 'value', 'other_value'], '$$_posthog_breakdown_other_$$') AS breakdown_value FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, @@ -3570,7 +3584,7 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0))) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(isNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''))), ifNull(equals(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), 'value'), 0), ifNull(equals(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), 'other_value'), 0))) GROUP BY e__pdi.person_id, breakdown_value) GROUP BY day_start, @@ -3585,7 +3599,7 @@ # --- # name: TestTrends.test_trends_breakdown_cumulative_poe_v2 ''' - SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value, + SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')) AS value, count(e.uuid) AS count FROM events AS e WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'))) @@ -3600,7 +3614,7 @@ ''' SELECT groupArray(day_start) AS date, groupArray(count) AS total, - ifNull(toString(breakdown_value), '') AS breakdown_value + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value FROM (SELECT day_start AS day_start, sum(count) OVER (PARTITION BY breakdown_value @@ -3626,7 +3640,7 @@ ORDER BY sec.breakdown_value ASC, day_start ASC UNION ALL SELECT count(DISTINCT ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id)) AS total, min(toStartOfDay(toTimeZone(e.timestamp, 'UTC'))) AS day_start, - transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_null_$$', 'value', 'other_value'], ['$$_posthog_breakdown_null_$$', 'value', 'other_value'], '$$_posthog_breakdown_other_$$') AS breakdown_value + transform(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_null_$$', 'value', 'other_value'], ['$$_posthog_breakdown_null_$$', 'value', 'other_value'], '$$_posthog_breakdown_other_$$') AS breakdown_value FROM events AS e SAMPLE 1 LEFT OUTER JOIN (SELECT argMax(person_overrides.override_person_id, person_overrides.version) AS override_person_id, @@ -3634,7 +3648,7 @@ FROM person_overrides WHERE equals(person_overrides.team_id, 2) GROUP BY person_overrides.old_person_id) AS e__override ON equals(e.person_id, e__override.old_person_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0))) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(isNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''))), ifNull(equals(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), 'value'), 0), ifNull(equals(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), 'other_value'), 0))) GROUP BY ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id), breakdown_value) GROUP BY day_start, @@ -3649,7 +3663,7 @@ # --- # name: TestTrends.test_trends_breakdown_with_session_property_single_aggregate_math_and_breakdown ''' - SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value, + SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')) AS value, max(e__session.duration) AS count FROM events AS e INNER JOIN @@ -3672,7 +3686,7 @@ breakdown_value AS breakdown_value FROM (SELECT any(e__session.duration) AS session_duration, - transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['value2', 'value1', '$$_posthog_breakdown_null_$$'], ['value2', 'value1', '$$_posthog_breakdown_null_$$'], '$$_posthog_breakdown_other_$$') AS breakdown_value + transform(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), '$$_posthog_breakdown_null_$$'), ['value2', 'value1', '$$_posthog_breakdown_null_$$'], ['value2', 'value1', '$$_posthog_breakdown_null_$$'], '$$_posthog_breakdown_other_$$') AS breakdown_value FROM events AS e SAMPLE 1 INNER JOIN (SELECT events.`$session_id` AS id, @@ -3680,7 +3694,7 @@ FROM events WHERE and(equals(events.team_id, 2), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), ifNull(notEquals(id, ''), 1)) GROUP BY id) AS e__session ON equals(e.`$session_id`, e__session.id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value2'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value1'), 0), isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')))) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), 'value2'), 0), ifNull(equals(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), 'value1'), 0), isNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''))))) GROUP BY e__session.id, breakdown_value) GROUP BY breakdown_value @@ -3691,7 +3705,7 @@ # --- # name: TestTrends.test_trends_breakdown_with_session_property_single_aggregate_math_and_breakdown.2 ''' - SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value, + SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')) AS value, max(e__session.duration) AS count FROM events AS e INNER JOIN @@ -3714,7 +3728,7 @@ breakdown_value AS breakdown_value FROM (SELECT any(e__session.duration) AS session_duration, - transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['value2', 'value1', '$$_posthog_breakdown_null_$$'], ['value2', 'value1', '$$_posthog_breakdown_null_$$'], '$$_posthog_breakdown_other_$$') AS breakdown_value + transform(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), '$$_posthog_breakdown_null_$$'), ['value2', 'value1', '$$_posthog_breakdown_null_$$'], ['value2', 'value1', '$$_posthog_breakdown_null_$$'], '$$_posthog_breakdown_other_$$') AS breakdown_value FROM events AS e SAMPLE 1 INNER JOIN (SELECT events.`$session_id` AS id, @@ -3722,7 +3736,7 @@ FROM events WHERE and(equals(events.team_id, 2), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), ifNull(notEquals(id, ''), 1)) GROUP BY id) AS e__session ON equals(e.`$session_id`, e__session.id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value2'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value1'), 0), isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')))) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), 'value2'), 0), ifNull(equals(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), 'value1'), 0), isNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''))))) GROUP BY e__session.id, breakdown_value) GROUP BY breakdown_value @@ -3854,7 +3868,7 @@ # --- # name: TestTrends.test_trends_count_per_user_average_aggregated_with_event_property_breakdown_with_sampling ''' - SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'color'), ''), 'null'), '^"|"$', '') AS value, + SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'color'), ''), 'null'), '^"|"$', '')) AS value, count(e.uuid) AS count FROM events AS e WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC')))), equals(e.event, 'viewed video')) @@ -3874,7 +3888,7 @@ breakdown_value AS breakdown_value FROM (SELECT count(e.uuid) AS total, - transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'color'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['red', 'blue', '$$_posthog_breakdown_null_$$'], ['red', 'blue', '$$_posthog_breakdown_null_$$'], '$$_posthog_breakdown_other_$$') AS breakdown_value + transform(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'color'), ''), 'null'), '^"|"$', '')), '$$_posthog_breakdown_null_$$'), ['red', 'blue', '$$_posthog_breakdown_null_$$'], ['red', 'blue', '$$_posthog_breakdown_null_$$'], '$$_posthog_breakdown_other_$$') AS breakdown_value FROM events AS e SAMPLE 1.0 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, @@ -3883,7 +3897,7 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(equals(e.event, 'viewed video'), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'color'), ''), 'null'), '^"|"$', ''), 'red'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'color'), ''), 'null'), '^"|"$', ''), 'blue'), 0), isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'color'), ''), 'null'), '^"|"$', '')))), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(0))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC')))) + WHERE and(equals(e.team_id, 2), and(equals(e.event, 'viewed video'), or(ifNull(equals(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'color'), ''), 'null'), '^"|"$', '')), 'red'), 0), ifNull(equals(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'color'), ''), 'null'), '^"|"$', '')), 'blue'), 0), isNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'color'), ''), 'null'), '^"|"$', ''))))), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(0))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC')))) GROUP BY e__pdi.person_id, breakdown_value) GROUP BY breakdown_value) @@ -4098,7 +4112,7 @@ # --- # name: TestTrends.test_trends_person_breakdown_with_session_property_single_aggregate_math_and_breakdown ''' - SELECT e__pdi__person.`properties___$some_prop` AS value, + SELECT toString(e__pdi__person.`properties___$some_prop`) AS value, max(e__session.duration) AS count FROM events AS e INNER JOIN @@ -4139,7 +4153,7 @@ breakdown_value AS breakdown_value FROM (SELECT any(e__session.duration) AS session_duration, - transform(ifNull(e__pdi__person.`properties___$some_prop`, '$$_posthog_breakdown_null_$$'), ['some_val', 'another_val'], ['some_val', 'another_val'], '$$_posthog_breakdown_other_$$') AS breakdown_value + transform(ifNull(toString(e__pdi__person.`properties___$some_prop`), '$$_posthog_breakdown_null_$$'), ['some_val', 'another_val'], ['some_val', 'another_val'], '$$_posthog_breakdown_other_$$') AS breakdown_value FROM events AS e SAMPLE 1 INNER JOIN (SELECT events.`$session_id` AS id, @@ -4165,7 +4179,7 @@ WHERE equals(person.team_id, 2) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(e__pdi__person.`properties___$some_prop`, 'some_val'), 0), ifNull(equals(e__pdi__person.`properties___$some_prop`, 'another_val'), 0))) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(toString(e__pdi__person.`properties___$some_prop`), 'some_val'), 0), ifNull(equals(toString(e__pdi__person.`properties___$some_prop`), 'another_val'), 0))) GROUP BY e__session.id, breakdown_value) GROUP BY breakdown_value @@ -4316,7 +4330,7 @@ # --- # name: TestTrends.test_trends_with_session_property_total_volume_math_with_breakdowns ''' - SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value, + SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')) AS value, max(e__session.duration) AS count FROM events AS e INNER JOIN @@ -4337,7 +4351,7 @@ ''' SELECT groupArray(day_start) AS date, groupArray(count) AS total, - ifNull(toString(breakdown_value), '') AS breakdown_value + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value FROM (SELECT sum(total) AS count, day_start AS day_start, @@ -4361,7 +4375,7 @@ breakdown_value AS breakdown_value FROM (SELECT any(e__session.duration) AS session_duration, - transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['value2', 'value1'], ['value2', 'value1'], '$$_posthog_breakdown_other_$$') AS breakdown_value, + transform(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), '$$_posthog_breakdown_null_$$'), ['value2', 'value1'], ['value2', 'value1'], '$$_posthog_breakdown_other_$$') AS breakdown_value, toStartOfWeek(toTimeZone(e.timestamp, 'UTC'), 0) AS day_start FROM events AS e SAMPLE 1 INNER JOIN @@ -4370,7 +4384,7 @@ FROM events WHERE and(equals(events.team_id, 2), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), ifNull(notEquals(id, ''), 1)) GROUP BY id) AS e__session ON equals(e.`$session_id`, e__session.id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value2'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value1'), 0))) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), 'value2'), 0), ifNull(equals(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), 'value1'), 0))) GROUP BY day_start, e__session.id, breakdown_value, @@ -4389,7 +4403,7 @@ # --- # name: TestTrends.test_trends_with_session_property_total_volume_math_with_breakdowns.2 ''' - SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value, + SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')) AS value, max(e__session.duration) AS count FROM events AS e INNER JOIN @@ -4410,7 +4424,7 @@ ''' SELECT groupArray(day_start) AS date, groupArray(count) AS total, - ifNull(toString(breakdown_value), '') AS breakdown_value + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value FROM (SELECT sum(total) AS count, day_start AS day_start, @@ -4434,7 +4448,7 @@ breakdown_value AS breakdown_value FROM (SELECT any(e__session.duration) AS session_duration, - transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['value2', 'value1'], ['value2', 'value1'], '$$_posthog_breakdown_other_$$') AS breakdown_value, + transform(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), '$$_posthog_breakdown_null_$$'), ['value2', 'value1'], ['value2', 'value1'], '$$_posthog_breakdown_other_$$') AS breakdown_value, toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start FROM events AS e SAMPLE 1 INNER JOIN @@ -4443,7 +4457,7 @@ FROM events WHERE and(equals(events.team_id, 2), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), ifNull(notEquals(id, ''), 1)) GROUP BY id) AS e__session ON equals(e.`$session_id`, e__session.id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value2'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value1'), 0))) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), 'value2'), 0), ifNull(equals(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), 'value1'), 0))) GROUP BY day_start, e__session.id, breakdown_value, diff --git a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends_data_warehouse_query.ambr b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends_data_warehouse_query.ambr index db9e8e1d45000..1e3bc1b5cbad6 100644 --- a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends_data_warehouse_query.ambr +++ b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends_data_warehouse_query.ambr @@ -1,7 +1,7 @@ # serializer version: 1 # name: TestTrendsDataWarehouseQuery.test_trends_breakdown ''' - SELECT e.prop_1 AS value, + SELECT toString(e.prop_1) AS value, count(e.id) AS count FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', 'id String, prop_1 String, prop_2 String, created DateTime64(3, \'UTC\')') AS e WHERE and(and(ifNull(greaterOrEquals(toTimeZone(e.created, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toTimeZone(e.created, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0)), and(ifNull(greaterOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0))) @@ -16,7 +16,7 @@ ''' SELECT groupArray(day_start) AS date, groupArray(count) AS total, - ifNull(toString(breakdown_value), '') AS breakdown_value + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value FROM (SELECT sum(total) AS count, day_start AS day_start, @@ -37,9 +37,9 @@ ORDER BY sec.breakdown_value ASC, day_start ASC UNION ALL SELECT count(e.id) AS total, toStartOfDay(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC')) AS day_start, - transform(ifNull(e.prop_1, '$$_posthog_breakdown_null_$$'), ['d', 'c', 'b', 'a'], ['d', 'c', 'b', 'a'], '$$_posthog_breakdown_other_$$') AS breakdown_value + transform(ifNull(toString(e.prop_1), '$$_posthog_breakdown_null_$$'), ['d', 'c', 'b', 'a'], ['d', 'c', 'b', 'a'], '$$_posthog_breakdown_other_$$') AS breakdown_value FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', 'id String, prop_1 String, prop_2 String, created DateTime64(3, \'UTC\')') AS e - WHERE and(ifNull(greaterOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), or(equals(e.prop_1, 'd'), equals(e.prop_1, 'c'), equals(e.prop_1, 'b'), equals(e.prop_1, 'a'))) + WHERE and(ifNull(greaterOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), or(ifNull(equals(toString(e.prop_1), 'd'), 0), ifNull(equals(toString(e.prop_1), 'c'), 0), ifNull(equals(toString(e.prop_1), 'b'), 0), ifNull(equals(toString(e.prop_1), 'a'), 0))) GROUP BY day_start, breakdown_value) GROUP BY day_start, @@ -54,7 +54,7 @@ # --- # name: TestTrendsDataWarehouseQuery.test_trends_breakdown_with_property ''' - SELECT e.prop_1 AS value, + SELECT toString(e.prop_1) AS value, count(e.id) AS count FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', 'id String, prop_1 String, prop_2 String, created DateTime64(3, \'UTC\')') AS e WHERE and(and(ifNull(greaterOrEquals(toTimeZone(e.created, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toTimeZone(e.created, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0)), and(ifNull(greaterOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), equals(e.prop_1, 'a'))) @@ -69,7 +69,7 @@ ''' SELECT groupArray(day_start) AS date, groupArray(count) AS total, - ifNull(toString(breakdown_value), '') AS breakdown_value + ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value FROM (SELECT sum(total) AS count, day_start AS day_start, @@ -90,9 +90,9 @@ ORDER BY sec.breakdown_value ASC, day_start ASC UNION ALL SELECT count(e.id) AS total, toStartOfDay(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC')) AS day_start, - transform(ifNull(e.prop_1, '$$_posthog_breakdown_null_$$'), ['a'], ['a'], '$$_posthog_breakdown_other_$$') AS breakdown_value + transform(ifNull(toString(e.prop_1), '$$_posthog_breakdown_null_$$'), ['a'], ['a'], '$$_posthog_breakdown_other_$$') AS breakdown_value FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', 'id String, prop_1 String, prop_2 String, created DateTime64(3, \'UTC\')') AS e - WHERE and(ifNull(greaterOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), equals(e.prop_1, 'a'), equals(e.prop_1, 'a')) + WHERE and(ifNull(greaterOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), equals(e.prop_1, 'a'), ifNull(equals(toString(e.prop_1), 'a'), 0)) GROUP BY day_start, breakdown_value) GROUP BY day_start, diff --git a/posthog/hogql_queries/insights/trends/test/test_trends.py b/posthog/hogql_queries/insights/trends/test/test_trends.py index 1ac54e16de629..9e885fbadcc1d 100644 --- a/posthog/hogql_queries/insights/trends/test/test_trends.py +++ b/posthog/hogql_queries/insights/trends/test/test_trends.py @@ -5180,7 +5180,9 @@ def test_breakdown_filtering_with_properties_in_new_format(self): ) response = sorted(response, key=lambda x: x["label"]) - self.assertEqual(len(response), 0) + self.assertEqual(len(response), 1) + self.assertEqual(response[0]["label"], "$$_posthog_breakdown_null_$$") + self.assertEqual(response[0]["count"], 0) @also_test_with_person_on_events_v2 @snapshot_clickhouse_queries diff --git a/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py b/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py index 433df7c7df23b..8d14950ec23b2 100644 --- a/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py +++ b/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py @@ -381,7 +381,7 @@ def test_trends_query_formula_breakdown_no_data(self): TrendsFilter(formula="A+B"), BreakdownFilter(breakdown_type=BreakdownType.person, breakdown="$browser"), ) - self.assertEqual([], response.results) + self.assertEqual([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], response.results[0]["data"]) def test_trends_query_formula_aggregate(self): self._create_test_events() @@ -714,16 +714,7 @@ def test_trends_breakdowns_multiple_hogql(self): breakdown_labels = [result["breakdown_value"] for result in response.results] assert len(response.results) == 8 - assert breakdown_labels == [ - "Chrome", - "Firefox", - "Edge", - "Safari", - "Chrome", - "Edge", - "Firefox", - "Safari", - ] + assert breakdown_labels == ["Chrome", "Firefox", "Edge", "Safari", "Chrome", "Edge", "Firefox", "Safari"] assert response.results[0]["label"] == f"$pageview - Chrome" assert response.results[1]["label"] == f"$pageview - Firefox" assert response.results[2]["label"] == f"$pageview - Edge" @@ -823,6 +814,7 @@ def test_trends_breakdown_and_aggregation_query_orchestration(self): 10, 0, ] + assert response.results[1]["data"] == [ 20, 0, @@ -1606,9 +1598,8 @@ def test_to_actors_query_options_breakdowns_boolean(self): assert response.series == [InsightActorsQuerySeries(label="$pageview", value=0)] assert response.breakdown == [ - # BreakdownItem(label="Other", value="$$_posthog_breakdown_other_$$"), # TODO: Add when "Other" works - BreakdownItem(label="true", value=1), - BreakdownItem(label="false", value=0), + BreakdownItem(label="true", value="true"), + BreakdownItem(label="false", value="false"), ] def test_to_actors_query_options_breakdowns_histogram(self): diff --git a/posthog/hogql_queries/insights/trends/trends_query_builder.py b/posthog/hogql_queries/insights/trends/trends_query_builder.py index 7be735d3b0a8b..a911e4bf8302a 100644 --- a/posthog/hogql_queries/insights/trends/trends_query_builder.py +++ b/posthog/hogql_queries/insights/trends/trends_query_builder.py @@ -14,6 +14,7 @@ from posthog.models.action.action import Action from posthog.models.filters.mixins.utils import cached_property from posthog.models.team.team import Team +from posthog.queries.trends.breakdown import BREAKDOWN_NULL_STRING_LABEL from posthog.schema import ( ActionsNode, DataWarehouseNode, @@ -68,7 +69,7 @@ def build_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: return full_query def build_actors_query( - self, time_frame: Optional[str] = None, breakdown_filter: Optional[str | int] = None + self, time_frame: Optional[str] = None, breakdown_filter: Optional[str] = None ) -> ast.SelectQuery | ast.SelectUnionQuery: breakdown = self._breakdown(is_actors_query=True, breakdown_values_override=breakdown_filter) @@ -292,7 +293,8 @@ def _get_events_subquery( # Just breakdowns elif breakdown.enabled: if not is_actors_query: - default_query.select.append(breakdown.column_expr()) + breakdown_expr = breakdown.column_expr() + default_query.select.append(breakdown_expr) default_query.group_by.append(ast.Field(chain=["breakdown_value"])) # Just session duration math property elif self._aggregation_operation.aggregating_on_session_duration(): @@ -369,7 +371,7 @@ def _outer_select_query(self, breakdown: Breakdown, inner_query: ast.SelectQuery name="ifNull", args=[ ast.Call(name="toString", args=[ast.Field(chain=["breakdown_value"])]), - ast.Constant(value=""), + ast.Constant(value=BREAKDOWN_NULL_STRING_LABEL), ], ), ) @@ -565,7 +567,7 @@ def session_duration_math_property_wrapper(self, default_query: ast.SelectQuery) query.group_by = [] return query - def _breakdown(self, is_actors_query: bool, breakdown_values_override: Optional[str | int] = None): + def _breakdown(self, is_actors_query: bool, breakdown_values_override: Optional[str] = None): return Breakdown( team=self.team, query=self.query, diff --git a/posthog/hogql_queries/insights/trends/trends_query_runner.py b/posthog/hogql_queries/insights/trends/trends_query_runner.py index d66110298ffab..d61720740f52b 100644 --- a/posthog/hogql_queries/insights/trends/trends_query_runner.py +++ b/posthog/hogql_queries/insights/trends/trends_query_runner.py @@ -23,9 +23,7 @@ from posthog.hogql.query import execute_hogql_query from posthog.hogql.timings import HogQLTimings from posthog.hogql_queries.insights.trends.breakdown_values import ( - BREAKDOWN_NULL_NUMERIC_LABEL, BREAKDOWN_NULL_STRING_LABEL, - BREAKDOWN_OTHER_NUMERIC_LABEL, BREAKDOWN_OTHER_STRING_LABEL, ) from posthog.hogql_queries.insights.trends.display import TrendsDisplay @@ -175,7 +173,7 @@ def to_actors_query( modifiers=self.modifiers, ) - query = query_builder.build_actors_query(time_frame=time_frame, breakdown_filter=breakdown_value) + query = query_builder.build_actors_query(time_frame=time_frame, breakdown_filter=str(breakdown_value)) return query @@ -240,14 +238,10 @@ def to_actors_query_options(self) -> InsightActorsQueryOptionsResponse: cohort_name = "all users" if str(value) == "0" else Cohort.objects.get(pk=value).name label = cohort_name value = value - elif value == BREAKDOWN_OTHER_STRING_LABEL or value == BREAKDOWN_OTHER_NUMERIC_LABEL: - # label = "Other" - # value = BREAKDOWN_OTHER_STRING_LABEL - continue # TODO: Add support for "other" breakdowns - elif value == BREAKDOWN_NULL_STRING_LABEL or value == BREAKDOWN_NULL_NUMERIC_LABEL: - # label = "Null" - # value = BREAKDOWN_NULL_STRING_LABEL - continue # TODO: Add support for "null" breakdowns + elif value == BREAKDOWN_OTHER_STRING_LABEL: + label = "Other (Groups all remaining values)" + elif value == BREAKDOWN_NULL_STRING_LABEL: + label = "None (No value)" elif is_boolean_breakdown: label = self._convert_boolean(value) else: @@ -501,18 +495,6 @@ def get_value(name: str, val: Any): series_object["breakdown_value"] = remapped_label - # If the breakdown value is the numeric "other", then set it to the string version - if ( - remapped_label == BREAKDOWN_OTHER_NUMERIC_LABEL - or remapped_label == str(BREAKDOWN_OTHER_NUMERIC_LABEL) - or remapped_label == float(BREAKDOWN_OTHER_NUMERIC_LABEL) - ): - series_object["breakdown_value"] = BREAKDOWN_OTHER_STRING_LABEL - if real_series_count > 1 or self._is_breakdown_field_boolean(): - series_object["label"] = "{} - {}".format(series_label or "All events", "Other") - else: - series_object["label"] = "Other" - res.append(series_object) return res From 6eea1a6554b090925f1117e2adee053bc7a37146 Mon Sep 17 00:00:00 2001 From: Paul D'Ambra Date: Thu, 21 Mar 2024 23:15:17 +0000 Subject: [PATCH 06/15] chore: only run depot runner comparison when labelled (#21092) * chore: only run depot runner comparison when labelled * fix --- .github/workflows/ci-backend-depot.yml | 10 +--------- .github/workflows/ci-e2e-depot.yml | 2 +- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci-backend-depot.yml b/.github/workflows/ci-backend-depot.yml index 3cf935ced141e..928886d44cf52 100644 --- a/.github/workflows/ci-backend-depot.yml +++ b/.github/workflows/ci-backend-depot.yml @@ -5,15 +5,7 @@ name: Backend CI (depot) on: - push: - branches: - - master pull_request: - workflow_dispatch: - inputs: - clickhouseServerVersion: - description: ClickHouse server version. Leave blank for default - type: string concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} @@ -41,7 +33,7 @@ jobs: changes: runs-on: depot-ubuntu-latest-4 timeout-minutes: 5 - if: github.repository == 'PostHog/posthog' + if: ${{ contains(github.event.pull_request.labels.*.name, 'test-depot') }} name: Determine need to run backend checks # Set job outputs to values from filter step outputs: diff --git a/.github/workflows/ci-e2e-depot.yml b/.github/workflows/ci-e2e-depot.yml index 4985dac9d746a..697d42e97f945 100644 --- a/.github/workflows/ci-e2e-depot.yml +++ b/.github/workflows/ci-e2e-depot.yml @@ -16,7 +16,7 @@ jobs: changes: runs-on: depot-ubuntu-latest-4 timeout-minutes: 5 - if: github.repository == 'PostHog/posthog' + if: ${{ contains(github.event.pull_request.labels.*.name, 'test-depot') }} name: Determine need to run E2E checks # Set job outputs to values from filter step outputs: From f43d7a9c7a0089e84ce6670c763358b8723172f1 Mon Sep 17 00:00:00 2001 From: Robbie Date: Fri, 22 Mar 2024 08:15:50 +0000 Subject: [PATCH 07/15] feat(web-analytics): HogQL sessions where extractor (#20986) * Add logic to extract a raw_sessions where clause from a query on the sessions table * Add failing test - need to figure out types * Replace sessions with raw_sessions * Use existing Visitor class hierarchy rather than creating a new one * Clear types * Add working test * Formatting * Fix typing * Better fix typing * Fix tests * Add a big comment * Increase comment * Add types to Visitor * Handles aliases better * Add test for joining events and sessions * Fix typing * More robust way of looking up tables * Add a test to make sure that collapsing ands works --- posthog/hogql/ast.py | 12 + posthog/hogql/base.py | 2 +- posthog/hogql/database/__init__.py | 0 posthog/hogql/database/models.py | 7 +- posthog/hogql/database/schema/__init__.py | 0 .../hogql/database/schema/cohort_people.py | 3 +- posthog/hogql/database/schema/groups.py | 3 +- posthog/hogql/database/schema/log_entries.py | 5 +- .../database/schema/person_distinct_ids.py | 3 +- posthog/hogql/database/schema/persons.py | 4 +- posthog/hogql/database/schema/persons_pdi.py | 3 +- .../database/schema/session_replay_events.py | 3 +- posthog/hogql/database/schema/sessions.py | 15 +- .../hogql/database/schema/util/__init__.py | 0 .../util/session_where_clause_extractor.py | 398 ++++++++++++++++++ .../test_session_where_clause_extractor.py | 284 +++++++++++++ posthog/hogql/test/test_bytecode.py | 2 +- posthog/hogql/test/test_visitor.py | 2 +- posthog/hogql/transforms/lazy_tables.py | 2 +- posthog/hogql/visitor.py | 13 +- 20 files changed, 730 insertions(+), 31 deletions(-) create mode 100644 posthog/hogql/database/__init__.py create mode 100644 posthog/hogql/database/schema/__init__.py create mode 100644 posthog/hogql/database/schema/util/__init__.py create mode 100644 posthog/hogql/database/schema/util/session_where_clause_extractor.py create mode 100644 posthog/hogql/database/schema/util/test/test_session_where_clause_extractor.py diff --git a/posthog/hogql/ast.py b/posthog/hogql/ast.py index a459514f2524f..806226b8f1b9e 100644 --- a/posthog/hogql/ast.py +++ b/posthog/hogql/ast.py @@ -46,8 +46,17 @@ def resolve_constant_type(self, context: HogQLContext): def resolve_database_field(self, context: HogQLContext): if isinstance(self.type, FieldType): return self.type.resolve_database_field(context) + if isinstance(self.type, PropertyType): + return self.type.field_type.resolve_database_field(context) raise NotImplementedException("FieldAliasType.resolve_database_field not implemented") + def resolve_table_type(self, context: HogQLContext): + if isinstance(self.type, FieldType): + return self.type.table_type + if isinstance(self.type, PropertyType): + return self.type.field_type.table_type + raise NotImplementedException("FieldAliasType.resolve_table_type not implemented") + @dataclass(kw_only=True) class BaseTableType(Type): @@ -339,6 +348,9 @@ def get_child(self, name: str | int, context: HogQLContext) -> Type: f'Can not access property "{name}" on field "{self.name}" of type: {type(database_field).__name__}' ) + def resolve_table_type(self, context: HogQLContext): + return self.table_type + @dataclass(kw_only=True) class PropertyType(Type): diff --git a/posthog/hogql/base.py b/posthog/hogql/base.py index fbdafffb2d08c..e8a74025b78be 100644 --- a/posthog/hogql/base.py +++ b/posthog/hogql/base.py @@ -32,7 +32,7 @@ def accept(self, visitor): return visit(self) if hasattr(visitor, "visit_unknown"): return visitor.visit_unknown(self) - raise NotImplementedException(f"Visitor has no method {method_name}") + raise NotImplementedException(f"{visitor.__class__.__name__} has no method {method_name}") @dataclass(kw_only=True) diff --git a/posthog/hogql/database/__init__.py b/posthog/hogql/database/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/posthog/hogql/database/models.py b/posthog/hogql/database/models.py index 95a00595c6472..d2da7868a7f9c 100644 --- a/posthog/hogql/database/models.py +++ b/posthog/hogql/database/models.py @@ -3,7 +3,6 @@ from posthog.hogql.base import Expr from posthog.hogql.errors import HogQLException, NotImplementedException -from posthog.schema import HogQLQueryModifiers if TYPE_CHECKING: from posthog.hogql.context import HogQLContext @@ -126,12 +125,14 @@ def resolve_table(self, context: "HogQLContext") -> Table: class LazyTable(Table): """ - A table that is replaced with a subquery returned from `lazy_select(requested_fields: Dict[name, chain], modifiers: HogQLQueryModifiers)` + A table that is replaced with a subquery returned from `lazy_select(requested_fields: Dict[name, chain], modifiers: HogQLQueryModifiers, node: SelectQuery)` """ model_config = ConfigDict(extra="forbid") - def lazy_select(self, requested_fields: Dict[str, List[str | int]], modifiers: HogQLQueryModifiers) -> Any: + def lazy_select( + self, requested_fields: Dict[str, List[str | int]], context: "HogQLContext", node: "SelectQuery" + ) -> Any: raise NotImplementedException("LazyTable.lazy_select not overridden") diff --git a/posthog/hogql/database/schema/__init__.py b/posthog/hogql/database/schema/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/posthog/hogql/database/schema/cohort_people.py b/posthog/hogql/database/schema/cohort_people.py index 72080419b7355..11723f0194619 100644 --- a/posthog/hogql/database/schema/cohort_people.py +++ b/posthog/hogql/database/schema/cohort_people.py @@ -9,7 +9,6 @@ FieldOrTable, ) from posthog.hogql.database.schema.persons import join_with_persons_table -from posthog.schema import HogQLQueryModifiers COHORT_PEOPLE_FIELDS = { "person_id": StringDatabaseField(name="person_id"), @@ -67,7 +66,7 @@ def to_printed_hogql(self): class CohortPeople(LazyTable): fields: Dict[str, FieldOrTable] = COHORT_PEOPLE_FIELDS - def lazy_select(self, requested_fields: Dict[str, List[str | int]], modifiers: HogQLQueryModifiers): + def lazy_select(self, requested_fields: Dict[str, List[str | int]], context, node): return select_from_cohort_people_table(requested_fields) def to_printed_clickhouse(self, context): diff --git a/posthog/hogql/database/schema/groups.py b/posthog/hogql/database/schema/groups.py index bb237d68e8070..3b9de7f08befc 100644 --- a/posthog/hogql/database/schema/groups.py +++ b/posthog/hogql/database/schema/groups.py @@ -13,7 +13,6 @@ FieldOrTable, ) from posthog.hogql.errors import HogQLException -from posthog.schema import HogQLQueryModifiers GROUPS_TABLE_FIELDS = { "index": IntegerDatabaseField(name="group_type_index"), @@ -83,7 +82,7 @@ def to_printed_hogql(self): class GroupsTable(LazyTable): fields: Dict[str, FieldOrTable] = GROUPS_TABLE_FIELDS - def lazy_select(self, requested_fields: Dict[str, List[str | int]], modifiers: HogQLQueryModifiers): + def lazy_select(self, requested_fields: Dict[str, List[str | int]], context, node): return select_from_groups_table(requested_fields) def to_printed_clickhouse(self, context): diff --git a/posthog/hogql/database/schema/log_entries.py b/posthog/hogql/database/schema/log_entries.py index c14e90e26da50..9f5dc816ac4b0 100644 --- a/posthog/hogql/database/schema/log_entries.py +++ b/posthog/hogql/database/schema/log_entries.py @@ -9,7 +9,6 @@ LazyTable, FieldOrTable, ) -from posthog.schema import HogQLQueryModifiers LOG_ENTRIES_FIELDS: Dict[str, FieldOrTable] = { "team_id": IntegerDatabaseField(name="team_id"), @@ -35,7 +34,7 @@ def to_printed_hogql(self): class ReplayConsoleLogsLogEntriesTable(LazyTable): fields: Dict[str, FieldOrTable] = LOG_ENTRIES_FIELDS - def lazy_select(self, requested_fields: Dict[str, List[str | int]], modifiers: HogQLQueryModifiers): + def lazy_select(self, requested_fields: Dict[str, List[str | int]], context, node): fields: List[ast.Expr] = [ast.Field(chain=["log_entries"] + chain) for name, chain in requested_fields.items()] return ast.SelectQuery( @@ -58,7 +57,7 @@ def to_printed_hogql(self): class BatchExportLogEntriesTable(LazyTable): fields: Dict[str, FieldOrTable] = LOG_ENTRIES_FIELDS - def lazy_select(self, requested_fields: Dict[str, List[str | int]], modifiers: HogQLQueryModifiers): + def lazy_select(self, requested_fields: Dict[str, List[str | int]], context, node): fields: List[ast.Expr] = [ast.Field(chain=["log_entries"] + chain) for name, chain in requested_fields.items()] return ast.SelectQuery( diff --git a/posthog/hogql/database/schema/person_distinct_ids.py b/posthog/hogql/database/schema/person_distinct_ids.py index 02144b35fc3d8..3304eccda862e 100644 --- a/posthog/hogql/database/schema/person_distinct_ids.py +++ b/posthog/hogql/database/schema/person_distinct_ids.py @@ -14,7 +14,6 @@ ) from posthog.hogql.database.schema.persons import join_with_persons_table from posthog.hogql.errors import HogQLException -from posthog.schema import HogQLQueryModifiers PERSON_DISTINCT_IDS_FIELDS = { "team_id": IntegerDatabaseField(name="team_id"), @@ -82,7 +81,7 @@ def to_printed_hogql(self): class PersonDistinctIdsTable(LazyTable): fields: Dict[str, FieldOrTable] = PERSON_DISTINCT_IDS_FIELDS - def lazy_select(self, requested_fields: Dict[str, List[str | int]], modifiers: HogQLQueryModifiers): + def lazy_select(self, requested_fields: Dict[str, List[str | int]], context, node): return select_from_person_distinct_ids_table(requested_fields) def to_printed_clickhouse(self, context): diff --git a/posthog/hogql/database/schema/persons.py b/posthog/hogql/database/schema/persons.py index a248da56b7307..c7abdd89e14c6 100644 --- a/posthog/hogql/database/schema/persons.py +++ b/posthog/hogql/database/schema/persons.py @@ -123,8 +123,8 @@ def to_printed_hogql(self): class PersonsTable(LazyTable): fields: Dict[str, FieldOrTable] = PERSONS_FIELDS - def lazy_select(self, requested_fields: Dict[str, List[str | int]], modifiers: HogQLQueryModifiers): - return select_from_persons_table(requested_fields, modifiers) + def lazy_select(self, requested_fields: Dict[str, List[str | int]], context, node): + return select_from_persons_table(requested_fields, context.modifiers) def to_printed_clickhouse(self, context): return "person" diff --git a/posthog/hogql/database/schema/persons_pdi.py b/posthog/hogql/database/schema/persons_pdi.py index 9f476f407b4d2..195643b90c08c 100644 --- a/posthog/hogql/database/schema/persons_pdi.py +++ b/posthog/hogql/database/schema/persons_pdi.py @@ -10,7 +10,6 @@ FieldOrTable, ) from posthog.hogql.errors import HogQLException -from posthog.schema import HogQLQueryModifiers # :NOTE: We already have person_distinct_ids.py, which most tables link to. This persons_pdi.py is a hack to @@ -63,7 +62,7 @@ class PersonsPDITable(LazyTable): "person_id": StringDatabaseField(name="person_id"), } - def lazy_select(self, requested_fields: Dict[str, List[str | int]], modifiers: HogQLQueryModifiers): + def lazy_select(self, requested_fields: Dict[str, List[str | int]], context, node): return persons_pdi_select(requested_fields) def to_printed_clickhouse(self, context): diff --git a/posthog/hogql/database/schema/session_replay_events.py b/posthog/hogql/database/schema/session_replay_events.py index c9d564c7d4588..baaecef89e049 100644 --- a/posthog/hogql/database/schema/session_replay_events.py +++ b/posthog/hogql/database/schema/session_replay_events.py @@ -15,7 +15,6 @@ PersonDistinctIdsTable, join_with_person_distinct_ids_table, ) -from posthog.schema import HogQLQueryModifiers RAW_ONLY_FIELDS = ["min_first_timestamp", "max_last_timestamp"] @@ -115,7 +114,7 @@ class SessionReplayEventsTable(LazyTable): "first_url": StringDatabaseField(name="first_url"), } - def lazy_select(self, requested_fields: Dict[str, List[str | int]], modifiers: HogQLQueryModifiers): + def lazy_select(self, requested_fields: Dict[str, List[str | int]], context, node): return select_from_session_replay_events_table(requested_fields) def to_printed_clickhouse(self, context): diff --git a/posthog/hogql/database/schema/sessions.py b/posthog/hogql/database/schema/sessions.py index 2a4865798eeb8..770daceaa23c5 100644 --- a/posthog/hogql/database/schema/sessions.py +++ b/posthog/hogql/database/schema/sessions.py @@ -1,5 +1,7 @@ from typing import Dict, List, cast +from posthog.hogql import ast +from posthog.hogql.context import HogQLContext from posthog.hogql.database.models import ( StringDatabaseField, DateTimeDatabaseField, @@ -11,7 +13,7 @@ LazyTable, ) from posthog.hogql.database.schema.channel_type import create_channel_type_expr -from posthog.schema import HogQLQueryModifiers +from posthog.hogql.database.schema.util.session_where_clause_extractor import SessionMinTimestampWhereClauseExtractor SESSIONS_COMMON_FIELDS: Dict[str, FieldOrTable] = { @@ -62,7 +64,9 @@ def avoid_asterisk_fields(self) -> List[str]: ] -def select_from_sessions_table(requested_fields: Dict[str, List[str | int]]): +def select_from_sessions_table( + requested_fields: Dict[str, List[str | int]], node: ast.SelectQuery, context: HogQLContext +): from posthog.hogql import ast table_name = "raw_sessions" @@ -134,10 +138,13 @@ def select_from_sessions_table(requested_fields: Dict[str, List[str | int]]): ) group_by_fields.append(ast.Field(chain=cast(list[str | int], [table_name]) + chain)) + where = SessionMinTimestampWhereClauseExtractor(context).get_inner_where(node) + return ast.SelectQuery( select=select_fields, select_from=ast.JoinExpr(table=ast.Field(chain=[table_name])), group_by=group_by_fields, + where=where, ) @@ -148,8 +155,8 @@ class SessionsTable(LazyTable): "channel_type": StringDatabaseField(name="channel_type"), } - def lazy_select(self, requested_fields: Dict[str, List[str | int]], modifiers: HogQLQueryModifiers): - return select_from_sessions_table(requested_fields) + def lazy_select(self, requested_fields: Dict[str, List[str | int]], context, node: ast.SelectQuery): + return select_from_sessions_table(requested_fields, node, context) def to_printed_clickhouse(self, context): return "sessions" diff --git a/posthog/hogql/database/schema/util/__init__.py b/posthog/hogql/database/schema/util/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/posthog/hogql/database/schema/util/session_where_clause_extractor.py b/posthog/hogql/database/schema/util/session_where_clause_extractor.py new file mode 100644 index 0000000000000..83933bdde8b85 --- /dev/null +++ b/posthog/hogql/database/schema/util/session_where_clause_extractor.py @@ -0,0 +1,398 @@ +from dataclasses import dataclass +from typing import Optional + +from posthog.hogql import ast +from posthog.hogql.ast import CompareOperationOp, ArithmeticOperationOp +from posthog.hogql.context import HogQLContext +from posthog.hogql.database.models import DatabaseField + +from posthog.hogql.visitor import clone_expr, CloningVisitor, Visitor + +SESSION_BUFFER_DAYS = 3 + + +@dataclass +class SessionMinTimestampWhereClauseExtractor(CloningVisitor): + """This class extracts the Where clause from the lazy sessions table, to the clickhouse sessions table. + + The sessions table in Clickhouse is an AggregatingMergeTree, and will have one row per session per day. This means that + when we want to query sessions, we need to pre-group these rows, so that we only have one row per session. + + We hide this detail using a lazy table, but to make querying the underlying Clickhouse table faster, we can inline the + min_timestamp where conditions from the select on the outer lazy table to the select on the inner real table. + + This class is called on the select query of the lazy table, and will return the where clause that should be applied to + the inner table. + + As a query can be unreasonably complex, we only handle simple cases, but this class is designed to fail-safe. If it + can't reason about a particular expression, it will just return a constant True, i.e. fetch more rows than necessary. + + This means that we can incrementally add support for more complex queries, without breaking existing queries, by + handling more cases. + + Some examples of failing-safe: + + `SELECT * FROM sessions where min_timestamp > '2022-01-01' AND f(session_id)` + only the` min_timestamp > '2022-01-01'` part is relevant, so we can ignore the `f(session_id)` part, and it is safe + to replace it with a constant True, which collapses the AND to just the `min_timestamp > '2022-01-01'` part. + + `SELECT * FROM sessions where min_timestamp > '2022-01-01' OR f(session_id)` + only the` min_timestamp > '2022-01-01'` part is relevant, and turning the `f(session_id)` part into a constant True + would collapse the OR to True. In this case we return None as no pre-filtering is possible. + + All min_timestamp comparisons are given a buffer of SESSION_BUFFER_DAYS each side, to ensure that we collect all the + relevant rows for each session. + """ + + context: HogQLContext + clear_types: bool = False + clear_locations: bool = False + + def get_inner_where(self, parsed_query: ast.SelectQuery) -> Optional[ast.Expr]: + if not parsed_query.where: + return None + + # visit the where clause + where = self.visit(parsed_query.where) + + if isinstance(where, ast.Constant): + return None + + return clone_expr(where, clear_types=True, clear_locations=True) + + def visit_compare_operation(self, node: ast.CompareOperation) -> ast.Expr: + is_left_constant = is_time_or_interval_constant(node.left) + is_right_constant = is_time_or_interval_constant(node.right) + is_left_timestamp_field = is_simple_timestamp_field_expression(node.left, self.context) + is_right_timestamp_field = is_simple_timestamp_field_expression(node.right, self.context) + + if is_left_constant and is_right_constant: + # just ignore this comparison + return ast.Constant(value=True) + + # handle the left side being a min_timestamp expression and the right being constant + if is_left_timestamp_field and is_right_constant: + if node.op == CompareOperationOp.Eq: + return ast.And( + exprs=[ + ast.CompareOperation( + op=ast.CompareOperationOp.LtEq, + left=ast.ArithmeticOperation( + op=ast.ArithmeticOperationOp.Sub, + left=rewrite_timestamp_field(node.left, self.context), + right=ast.Call(name="toIntervalDay", args=[ast.Constant(value=SESSION_BUFFER_DAYS)]), + ), + right=node.right, + ), + ast.CompareOperation( + op=ast.CompareOperationOp.GtEq, + left=ast.ArithmeticOperation( + op=ast.ArithmeticOperationOp.Add, + left=rewrite_timestamp_field(node.left, self.context), + right=ast.Call(name="toIntervalDay", args=[ast.Constant(value=SESSION_BUFFER_DAYS)]), + ), + right=node.right, + ), + ] + ) + elif node.op == CompareOperationOp.Gt or node.op == CompareOperationOp.GtEq: + return ast.CompareOperation( + op=ast.CompareOperationOp.GtEq, + left=ast.ArithmeticOperation( + op=ast.ArithmeticOperationOp.Add, + left=rewrite_timestamp_field(node.left, self.context), + right=ast.Call(name="toIntervalDay", args=[ast.Constant(value=SESSION_BUFFER_DAYS)]), + ), + right=node.right, + ) + elif node.op == CompareOperationOp.Lt or node.op == CompareOperationOp.LtEq: + return ast.CompareOperation( + op=ast.CompareOperationOp.LtEq, + left=ast.ArithmeticOperation( + op=ast.ArithmeticOperationOp.Sub, + left=rewrite_timestamp_field(node.left, self.context), + right=ast.Call(name="toIntervalDay", args=[ast.Constant(value=SESSION_BUFFER_DAYS)]), + ), + right=node.right, + ) + elif is_right_timestamp_field and is_left_constant: + # let's not duplicate the logic above, instead just flip and it and recurse + if node.op in [ + CompareOperationOp.Eq, + CompareOperationOp.Lt, + CompareOperationOp.LtEq, + CompareOperationOp.Gt, + CompareOperationOp.GtEq, + ]: + return self.visit( + ast.CompareOperation( + op=CompareOperationOp.Eq + if node.op == CompareOperationOp.Eq + else CompareOperationOp.Lt + if node.op == CompareOperationOp.Gt + else CompareOperationOp.LtEq + if node.op == CompareOperationOp.GtEq + else CompareOperationOp.Gt + if node.op == CompareOperationOp.Lt + else CompareOperationOp.GtEq, + left=node.right, + right=node.left, + ) + ) + + return ast.Constant(value=True) + + def visit_arithmetic_operation(self, node: ast.ArithmeticOperation) -> ast.Expr: + # don't even try to handle complex logic + return ast.Constant(value=True) + + def visit_not(self, node: ast.Not) -> ast.Expr: + return ast.Constant(value=True) + + def visit_call(self, node: ast.Call) -> ast.Expr: + if node.name == "and": + return self.visit_and(ast.And(exprs=node.args)) + elif node.name == "or": + return self.visit_or(ast.Or(exprs=node.args)) + return ast.Constant(value=True) + + def visit_field(self, node: ast.Field) -> ast.Expr: + return ast.Constant(value=True) + + def visit_constant(self, node: ast.Constant) -> ast.Expr: + return ast.Constant(value=True) + + def visit_placeholder(self, node: ast.Placeholder) -> ast.Expr: + raise Exception() # this should never happen, as placeholders should be resolved before this runs + + def visit_and(self, node: ast.And) -> ast.Expr: + exprs = [self.visit(expr) for expr in node.exprs] + + flattened = [] + for expr in exprs: + if isinstance(expr, ast.And): + flattened.extend(expr.exprs) + else: + flattened.append(expr) + + if any(isinstance(expr, ast.Constant) and expr.value is False for expr in flattened): + return ast.Constant(value=False) + + filtered = [expr for expr in flattened if not isinstance(expr, ast.Constant) or expr.value is not True] + if len(filtered) == 0: + return ast.Constant(value=True) + elif len(filtered) == 1: + return filtered[0] + else: + return ast.And(exprs=filtered) + + def visit_or(self, node: ast.Or) -> ast.Expr: + exprs = [self.visit(expr) for expr in node.exprs] + + flattened = [] + for expr in exprs: + if isinstance(expr, ast.Or): + flattened.extend(expr.exprs) + else: + flattened.append(expr) + + if any(isinstance(expr, ast.Constant) and expr.value is True for expr in flattened): + return ast.Constant(value=True) + + filtered = [expr for expr in flattened if not isinstance(expr, ast.Constant) or expr.value is not False] + if len(filtered) == 0: + return ast.Constant(value=False) + elif len(filtered) == 1: + return filtered[0] + else: + return ast.Or(exprs=filtered) + + def visit_alias(self, node: ast.Alias) -> ast.Expr: + return self.visit(node.expr) + + +def is_time_or_interval_constant(expr: ast.Expr) -> bool: + return IsTimeOrIntervalConstantVisitor().visit(expr) + + +class IsTimeOrIntervalConstantVisitor(Visitor[bool]): + def visit_constant(self, node: ast.Constant) -> bool: + return True + + def visit_compare_operation(self, node: ast.CompareOperation) -> bool: + return self.visit(node.left) and self.visit(node.right) + + def visit_arithmetic_operation(self, node: ast.ArithmeticOperation) -> bool: + return self.visit(node.left) and self.visit(node.right) + + def visit_call(self, node: ast.Call) -> bool: + # some functions just return a constant + if node.name in ["today", "now"]: + return True + # some functions return a constant if the first argument is a constant + if node.name in [ + "parseDateTime64BestEffortOrNull", + "toDateTime", + "toTimeZone", + "assumeNotNull", + "toIntervalYear", + "toIntervalMonth", + "toIntervalWeek", + "toIntervalDay", + "toIntervalHour", + "toIntervalMinute", + "toIntervalSecond", + "toStartOfDay", + "toStartOfWeek", + "toStartOfMonth", + "toStartOfQuarter", + "toStartOfYear", + ]: + return self.visit(node.args[0]) + + if node.name in ["minus", "add"]: + return all(self.visit(arg) for arg in node.args) + + # otherwise we don't know, so return False + return False + + def visit_field(self, node: ast.Field) -> bool: + return False + + def visit_and(self, node: ast.And) -> bool: + return False + + def visit_or(self, node: ast.Or) -> bool: + return False + + def visit_not(self, node: ast.Not) -> bool: + return False + + def visit_placeholder(self, node: ast.Placeholder) -> bool: + raise Exception() + + def visit_alias(self, node: ast.Alias) -> bool: + return self.visit(node.expr) + + +def is_simple_timestamp_field_expression(expr: ast.Expr, context: HogQLContext) -> bool: + return IsSimpleTimestampFieldExpressionVisitor(context).visit(expr) + + +@dataclass +class IsSimpleTimestampFieldExpressionVisitor(Visitor[bool]): + context: HogQLContext + + def visit_constant(self, node: ast.Constant) -> bool: + return False + + def visit_field(self, node: ast.Field) -> bool: + if node.type and isinstance(node.type, ast.FieldType): + resolved_field = node.type.resolve_database_field(self.context) + if resolved_field and isinstance(resolved_field, DatabaseField) and resolved_field: + return resolved_field.name in ["min_timestamp", "timestamp"] + # no type information, so just use the name of the field + return node.chain[-1] in ["min_timestamp", "timestamp"] + + def visit_arithmetic_operation(self, node: ast.ArithmeticOperation) -> bool: + # only allow the min_timestamp field to be used on one side of the arithmetic operation + return ( + self.visit(node.left) + and is_time_or_interval_constant(node.right) + or (self.visit(node.right) and is_time_or_interval_constant(node.left)) + ) + + def visit_call(self, node: ast.Call) -> bool: + # some functions count as a timestamp field expression if their first argument is + if node.name in [ + "parseDateTime64BestEffortOrNull", + "toDateTime", + "toTimeZone", + "assumeNotNull", + "toStartOfDay", + "toStartOfWeek", + "toStartOfMonth", + "toStartOfQuarter", + "toStartOfYear", + ]: + return self.visit(node.args[0]) + + if node.name in ["minus", "add"]: + return self.visit_arithmetic_operation( + ast.ArithmeticOperation( + op=ArithmeticOperationOp.Sub if node.name == "minus" else ArithmeticOperationOp.Add, + left=node.args[0], + right=node.args[1], + ) + ) + + # otherwise we don't know, so return False + return False + + def visit_compare_operation(self, node: ast.CompareOperation) -> bool: + return False + + def visit_and(self, node: ast.And) -> bool: + return False + + def visit_or(self, node: ast.Or) -> bool: + return False + + def visit_not(self, node: ast.Not) -> bool: + return False + + def visit_placeholder(self, node: ast.Placeholder) -> bool: + raise Exception() + + def visit_alias(self, node: ast.Alias) -> bool: + from posthog.hogql.database.schema.events import EventsTable + from posthog.hogql.database.schema.sessions import SessionsTable + + if node.type and isinstance(node.type, ast.FieldAliasType): + resolved_field = node.type.resolve_database_field(self.context) + table_type = node.type.resolve_table_type(self.context) + if not table_type: + return False + return ( + isinstance(table_type, ast.TableType) + and isinstance(table_type.table, EventsTable) + and resolved_field.name == "timestamp" + ) or ( + isinstance(table_type, ast.LazyTableType) + and isinstance(table_type.table, SessionsTable) + and resolved_field.name == "min_timestamp" + ) + + return self.visit(node.expr) + + +def rewrite_timestamp_field(expr: ast.Expr, context: HogQLContext) -> ast.Expr: + return RewriteTimestampFieldVisitor(context).visit(expr) + + +class RewriteTimestampFieldVisitor(CloningVisitor): + context: HogQLContext + + def __init__(self, context: HogQLContext, *args, **kwargs): + super().__init__(*args, **kwargs) + self.context = context + + def visit_field(self, node: ast.Field) -> ast.Field: + from posthog.hogql.database.schema.events import EventsTable + from posthog.hogql.database.schema.sessions import SessionsTable + + if node.type and isinstance(node.type, ast.FieldType): + resolved_field = node.type.resolve_database_field(self.context) + table = node.type.resolve_table_type(self.context).table + if resolved_field and isinstance(resolved_field, DatabaseField): + if (isinstance(table, EventsTable) and resolved_field.name == "timestamp") or ( + isinstance(table, SessionsTable) and resolved_field.name == "min_timestamp" + ): + return ast.Field(chain=["raw_sessions", "min_timestamp"]) + # no type information, so just use the name of the field + if node.chain[-1] in ["min_timestamp", "timestamp"]: + return ast.Field(chain=["raw_sessions", "min_timestamp"]) + return node + + def visit_alias(self, node: ast.Alias) -> ast.Expr: + return self.visit(node.expr) diff --git a/posthog/hogql/database/schema/util/test/test_session_where_clause_extractor.py b/posthog/hogql/database/schema/util/test/test_session_where_clause_extractor.py new file mode 100644 index 0000000000000..bc5324e739ad9 --- /dev/null +++ b/posthog/hogql/database/schema/util/test/test_session_where_clause_extractor.py @@ -0,0 +1,284 @@ +from typing import Union, Optional, Dict + +from posthog.hogql import ast +from posthog.hogql.context import HogQLContext +from posthog.hogql.database.schema.util.session_where_clause_extractor import SessionMinTimestampWhereClauseExtractor +from posthog.hogql.modifiers import create_default_modifiers_for_team +from posthog.hogql.parser import parse_select, parse_expr +from posthog.hogql.printer import prepare_ast_for_printing, print_prepared_ast +from posthog.hogql.visitor import clone_expr +from posthog.test.base import ClickhouseTestMixin, APIBaseTest + + +def f(s: Union[str, ast.Expr, None], placeholders: Optional[dict[str, ast.Expr]] = None) -> Union[ast.Expr, None]: + if s is None: + return None + if isinstance(s, str): + expr = parse_expr(s, placeholders=placeholders) + else: + expr = s + return clone_expr(expr, clear_types=True, clear_locations=True) + + +def parse( + s: str, + placeholders: Optional[Dict[str, ast.Expr]] = None, +) -> ast.SelectQuery: + parsed = parse_select(s, placeholders=placeholders) + assert isinstance(parsed, ast.SelectQuery) + return parsed + + +class TestSessionTimestampInliner(ClickhouseTestMixin, APIBaseTest): + @property + def inliner(self): + team = self.team + modifiers = create_default_modifiers_for_team(team) + context = HogQLContext( + team_id=team.pk, + team=team, + enable_select_queries=True, + modifiers=modifiers, + ) + return SessionMinTimestampWhereClauseExtractor(context) + + def test_handles_select_with_no_where_claus(self): + inner_where = self.inliner.get_inner_where(parse("SELECT * FROM sessions")) + assert inner_where is None + + def test_handles_select_with_eq(self): + actual = f(self.inliner.get_inner_where(parse("SELECT * FROM sessions WHERE min_timestamp = '2021-01-01'"))) + expected = f( + "((raw_sessions.min_timestamp - toIntervalDay(3)) <= '2021-01-01') AND ((raw_sessions.min_timestamp + toIntervalDay(3)) >= '2021-01-01')" + ) + assert expected == actual + + def test_handles_select_with_eq_flipped(self): + actual = f(self.inliner.get_inner_where(parse("SELECT * FROM sessions WHERE '2021-01-01' = min_timestamp"))) + expected = f( + "((raw_sessions.min_timestamp - toIntervalDay(3)) <= '2021-01-01') AND ((raw_sessions.min_timestamp + toIntervalDay(3)) >= '2021-01-01')" + ) + assert expected == actual + + def test_handles_select_with_simple_gt(self): + actual = f(self.inliner.get_inner_where(parse("SELECT * FROM sessions WHERE min_timestamp > '2021-01-01'"))) + expected = f("((raw_sessions.min_timestamp + toIntervalDay(3)) >= '2021-01-01')") + assert expected == actual + + def test_handles_select_with_simple_gte(self): + actual = f(self.inliner.get_inner_where(parse("SELECT * FROM sessions WHERE min_timestamp >= '2021-01-01'"))) + expected = f("((raw_sessions.min_timestamp + toIntervalDay(3)) >= '2021-01-01')") + assert expected == actual + + def test_handles_select_with_simple_lt(self): + actual = f(self.inliner.get_inner_where(parse("SELECT * FROM sessions WHERE min_timestamp < '2021-01-01'"))) + expected = f("((raw_sessions.min_timestamp - toIntervalDay(3)) <= '2021-01-01')") + assert expected == actual + + def test_handles_select_with_simple_lte(self): + actual = f(self.inliner.get_inner_where(parse("SELECT * FROM sessions WHERE min_timestamp <= '2021-01-01'"))) + expected = f("((raw_sessions.min_timestamp - toIntervalDay(3)) <= '2021-01-01')") + assert expected == actual + + def test_select_with_placeholder(self): + actual = f( + self.inliner.get_inner_where( + parse( + "SELECT * FROM sessions WHERE min_timestamp > {timestamp}", + placeholders={"timestamp": ast.Constant(value="2021-01-01")}, + ) + ) + ) + expected = f("((raw_sessions.min_timestamp + toIntervalDay(3)) >= '2021-01-01')") + assert expected == actual + + def test_unrelated_equals(self): + actual = self.inliner.get_inner_where( + parse("SELECT * FROM sessions WHERE initial_utm_campaign = initial_utm_source") + ) + assert actual is None + + def test_timestamp_and(self): + actual = f( + self.inliner.get_inner_where( + parse("SELECT * FROM sessions WHERE and(min_timestamp >= '2021-01-01', min_timestamp <= '2021-01-03')") + ) + ) + expected = f( + "((raw_sessions.min_timestamp + toIntervalDay(3)) >= '2021-01-01') AND ((raw_sessions.min_timestamp - toIntervalDay(3)) <= '2021-01-03')" + ) + assert expected == actual + + def test_timestamp_or(self): + actual = f( + self.inliner.get_inner_where( + parse("SELECT * FROM sessions WHERE and(min_timestamp <= '2021-01-01', min_timestamp >= '2021-01-03')") + ) + ) + expected = f( + "((raw_sessions.min_timestamp - toIntervalDay(3)) <= '2021-01-01') AND ((raw_sessions.min_timestamp + toIntervalDay(3)) >= '2021-01-03')" + ) + assert expected == actual + + def test_unrelated_function(self): + actual = f(self.inliner.get_inner_where(parse("SELECT * FROM sessions WHERE like('a', 'b')"))) + assert actual is None + + def test_timestamp_unrelated_function(self): + actual = f( + self.inliner.get_inner_where(parse("SELECT * FROM sessions WHERE like(toString(min_timestamp), 'b')")) + ) + assert actual is None + + def test_timestamp_unrelated_function_timestamp(self): + actual = f( + self.inliner.get_inner_where(parse("SELECT * FROM sessions WHERE like(toString(min_timestamp), 'b')")) + ) + assert actual is None + + def test_ambiguous_or(self): + actual = f( + self.inliner.get_inner_where( + parse( + "SELECT * FROM sessions WHERE or(min_timestamp > '2021-01-03', like(toString(min_timestamp), 'b'))" + ) + ) + ) + assert actual is None + + def test_ambiguous_and(self): + actual = f( + self.inliner.get_inner_where( + parse( + "SELECT * FROM sessions WHERE and(min_timestamp > '2021-01-03', like(toString(min_timestamp), 'b'))" + ) + ) + ) + assert actual == f("(raw_sessions.min_timestamp + toIntervalDay(3)) >= '2021-01-03'") + + def test_join(self): + actual = f( + self.inliner.get_inner_where( + parse( + "SELECT * FROM events JOIN sessions ON events.session_id = raw_sessions.session_id WHERE min_timestamp > '2021-01-03'" + ) + ) + ) + expected = f("((raw_sessions.min_timestamp + toIntervalDay(3)) >= '2021-01-03')") + assert expected == actual + + def test_join_using_events_timestamp_filter(self): + actual = f( + self.inliner.get_inner_where( + parse( + "SELECT * FROM events JOIN sessions ON events.session_id = raw_sessions.session_id WHERE timestamp > '2021-01-03'" + ) + ) + ) + expected = f("((raw_sessions.min_timestamp + toIntervalDay(3)) >= '2021-01-03')") + assert expected == actual + + def test_minus(self): + actual = f(self.inliner.get_inner_where(parse("SELECT * FROM sessions WHERE min_timestamp >= today() - 2"))) + expected = f("((raw_sessions.min_timestamp + toIntervalDay(3)) >= (today() - 2))") + assert expected == actual + + def test_minus_function(self): + actual = f( + self.inliner.get_inner_where(parse("SELECT * FROM sessions WHERE min_timestamp >= minus(today() , 2)")) + ) + expected = f("((raw_sessions.min_timestamp + toIntervalDay(3)) >= minus(today(), 2))") + assert expected == actual + + def test_real_example(self): + actual = f( + self.inliner.get_inner_where( + parse( + "SELECT * FROM events JOIN sessions ON events.session_id = raw_sessions.session_id WHERE event = '$pageview' AND toTimeZone(timestamp, 'US/Pacific') >= toDateTime('2024-03-12 00:00:00', 'US/Pacific') AND toTimeZone(timestamp, 'US/Pacific') <= toDateTime('2024-03-19 23:59:59', 'US/Pacific')" + ) + ) + ) + expected = f( + "(toTimeZone(raw_sessions.min_timestamp, 'US/Pacific') + toIntervalDay(3)) >= toDateTime('2024-03-12 00:00:00', 'US/Pacific') AND (toTimeZone(raw_sessions.min_timestamp, 'US/Pacific') - toIntervalDay(3)) <= toDateTime('2024-03-19 23:59:59', 'US/Pacific') " + ) + assert expected == actual + + def test_collapse_and(self): + actual = f( + self.inliner.get_inner_where( + parse( + "SELECT * FROM sesions WHERE event = '$pageview' AND (TRUE AND (TRUE AND TRUE AND (timestamp >= '2024-03-12' AND TRUE)))" + ) + ) + ) + expected = f("(raw_sessions.min_timestamp + toIntervalDay(3)) >= '2024-03-12'") + assert expected == actual + + +class TestSessionsQueriesHogQLToClickhouse(ClickhouseTestMixin, APIBaseTest): + def print_query(self, query: str) -> str: + team = self.team + modifiers = create_default_modifiers_for_team(team) + context = HogQLContext( + team_id=team.pk, + team=team, + enable_select_queries=True, + modifiers=modifiers, + ) + prepared_ast = prepare_ast_for_printing(node=parse(query), context=context, dialect="clickhouse") + pretty = print_prepared_ast(prepared_ast, context=context, dialect="clickhouse", pretty=True) + return pretty + + def test_select_with_timestamp(self): + actual = self.print_query("SELECT session_id FROM sessions WHERE min_timestamp > '2021-01-01'") + expected = f"""SELECT + sessions.session_id AS session_id +FROM + (SELECT + sessions.session_id AS session_id, + min(sessions.min_timestamp) AS min_timestamp + FROM + sessions + WHERE + and(equals(sessions.team_id, {self.team.id}), ifNull(greaterOrEquals(plus(toTimeZone(sessions.min_timestamp, %(hogql_val_0)s), toIntervalDay(3)), %(hogql_val_1)s), 0)) + GROUP BY + sessions.session_id, + sessions.session_id) AS sessions +WHERE + ifNull(greater(toTimeZone(sessions.min_timestamp, %(hogql_val_2)s), %(hogql_val_3)s), 0) +LIMIT 10000""" + assert expected == actual + + def test_join_with_events(self): + actual = self.print_query( + """ +SELECT + sessions.session_id, + uniq(uuid) +FROM events +JOIN sessions +ON events.$session_id = sessions.session_id +WHERE events.timestamp > '2021-01-01' +GROUP BY sessions.session_id +""" + ) + expected = f"""SELECT + sessions.session_id AS session_id, + uniq(events.uuid) +FROM + events + JOIN (SELECT + sessions.session_id AS session_id + FROM + sessions + WHERE + and(equals(sessions.team_id, {self.team.id}), ifNull(greaterOrEquals(plus(toTimeZone(sessions.min_timestamp, %(hogql_val_0)s), toIntervalDay(3)), %(hogql_val_1)s), 0)) + GROUP BY + sessions.session_id, + sessions.session_id) AS sessions ON equals(events.`$session_id`, sessions.session_id) +WHERE + and(equals(events.team_id, {self.team.id}), greater(toTimeZone(events.timestamp, %(hogql_val_2)s), %(hogql_val_3)s)) +GROUP BY + sessions.session_id +LIMIT 10000""" + assert expected == actual diff --git a/posthog/hogql/test/test_bytecode.py b/posthog/hogql/test/test_bytecode.py index cf0b8113b574d..f7d810700e74a 100644 --- a/posthog/hogql/test/test_bytecode.py +++ b/posthog/hogql/test/test_bytecode.py @@ -130,7 +130,7 @@ def test_bytecode_create(self): def test_bytecode_create_error(self): with self.assertRaises(NotImplementedException) as e: to_bytecode("(select 1)") - self.assertEqual(str(e.exception), "Visitor has no method visit_select_query") + self.assertEqual(str(e.exception), "BytecodeBuilder has no method visit_select_query") with self.assertRaises(NotImplementedException) as e: to_bytecode("1 in cohort 2") diff --git a/posthog/hogql/test/test_visitor.py b/posthog/hogql/test/test_visitor.py index 8aa6689328fbf..a01193f788d5f 100644 --- a/posthog/hogql/test/test_visitor.py +++ b/posthog/hogql/test/test_visitor.py @@ -125,7 +125,7 @@ def visit_arithmetic_operation(self, node: ast.ArithmeticOperation): with self.assertRaises(HogQLException) as e: UnknownNotDefinedVisitor().visit(parse_expr("1 + 3 / 'asd2'")) - self.assertEqual(str(e.exception), "Visitor has no method visit_constant") + self.assertEqual(str(e.exception), "UnknownNotDefinedVisitor has no method visit_constant") def test_hogql_exception_start_end(self): class EternalVisitor(TraversingVisitor): diff --git a/posthog/hogql/transforms/lazy_tables.py b/posthog/hogql/transforms/lazy_tables.py index bdbb322d54397..df8ce6962259c 100644 --- a/posthog/hogql/transforms/lazy_tables.py +++ b/posthog/hogql/transforms/lazy_tables.py @@ -309,7 +309,7 @@ def create_override(table_name: str, field_chain: List[str | int]) -> None: # For all the collected tables, create the subqueries, and add them to the table. for table_name, table_to_add in tables_to_add.items(): - subquery = table_to_add.lazy_table.lazy_select(table_to_add.fields_accessed, self.context.modifiers) + subquery = table_to_add.lazy_table.lazy_select(table_to_add.fields_accessed, self.context, node=node) subquery = cast(ast.SelectQuery, clone_expr(subquery, clear_locations=True)) subquery = cast(ast.SelectQuery, resolve_types(subquery, self.context, self.dialect, [node.type])) old_table_type = select_type.tables[table_name] diff --git a/posthog/hogql/visitor.py b/posthog/hogql/visitor.py index c11856169297f..2bf968abf2ab0 100644 --- a/posthog/hogql/visitor.py +++ b/posthog/hogql/visitor.py @@ -1,4 +1,4 @@ -from typing import Optional +from typing import Optional, TypeVar, Generic, Any from posthog.hogql import ast from posthog.hogql.base import AST, Expr @@ -14,8 +14,11 @@ def clear_locations(expr: Expr) -> Expr: return CloningVisitor(clear_locations=True).visit(expr) -class Visitor(object): - def visit(self, node: AST): +T = TypeVar("T") + + +class Visitor(Generic[T]): + def visit(self, node: AST) -> T: if node is None: return node @@ -28,7 +31,7 @@ def visit(self, node: AST): raise e -class TraversingVisitor(Visitor): +class TraversingVisitor(Visitor[None]): """Visitor that traverses the AST tree without returning anything""" def visit_expr(self, node: Expr): @@ -258,7 +261,7 @@ def visit_hogqlx_attribute(self, node: ast.HogQLXAttribute): self.visit(node.value) -class CloningVisitor(Visitor): +class CloningVisitor(Visitor[Any]): """Visitor that traverses and clones the AST tree. Clears types.""" def __init__( From 5ae6b301208b766523dc22d33f3938f1b70eb890 Mon Sep 17 00:00:00 2001 From: Eric Duong Date: Fri, 22 Mar 2024 04:39:20 -0400 Subject: [PATCH 08/15] chore(data-warehouse): join UI cleanup (#21085) * add loading indicator * add custom settings * update sizing --- .../scenes/data-warehouse/ViewLinkModal.tsx | 34 ++++++++++++++----- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/frontend/src/scenes/data-warehouse/ViewLinkModal.tsx b/frontend/src/scenes/data-warehouse/ViewLinkModal.tsx index 2116d2da6e74d..11f50ca4f0d27 100644 --- a/frontend/src/scenes/data-warehouse/ViewLinkModal.tsx +++ b/frontend/src/scenes/data-warehouse/ViewLinkModal.tsx @@ -1,6 +1,6 @@ import './ViewLinkModal.scss' -import { IconTrash } from '@posthog/icons' +import { IconCollapse, IconExpand, IconTrash } from '@posthog/icons' import { LemonButton, LemonDivider, @@ -35,7 +35,7 @@ export function ViewLinkModal(): JSX.Element { } isOpen={isJoinTableModalOpen} onClose={toggleJoinTableModal} - width={600} + width={700} > @@ -57,6 +57,7 @@ export function ViewLinkForm(): JSX.Element { selectedJoiningKey, sourceIsUsingHogQLExpression, joiningIsUsingHogQLExpression, + isViewLinkSubmitting, } = useValues(viewLinkLogic) const { selectJoiningTable, @@ -66,12 +67,13 @@ export function ViewLinkForm(): JSX.Element { selectSourceKey, selectJoiningKey, } = useActions(viewLinkLogic) + const [advancedSettingsExpanded, setAdvancedSettingsExpanded] = useState(false) return (
-
+
Source Table {isNewJoin ? ( @@ -86,7 +88,7 @@ export function ViewLinkForm(): JSX.Element { selectedSourceTableName ?? '' )}
-
+
Joining Table
-
-
+
+
Source Table Key <> @@ -124,7 +126,7 @@ export function ViewLinkForm(): JSX.Element {
-
+
Joining Table Key <> @@ -148,8 +150,22 @@ export function ViewLinkForm(): JSX.Element {
{sqlCodeSnippet && ( - <> +
+ setAdvancedSettingsExpanded(!advancedSettingsExpanded)} + sideIcon={advancedSettingsExpanded ? : } + > +
+

Advanced settings

+
Customize how the fields are accessed
+
+
+
+ )} + {sqlCodeSnippet && advancedSettingsExpanded && ( + <>
Field Name @@ -185,7 +201,7 @@ export function ViewLinkForm(): JSX.Element { Close - + Save
From 5f4cb5486187e4167b53b1131e2272c7fcf9aae9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Far=C3=ADas=20Santana?= Date: Fri, 22 Mar 2024 09:58:51 +0100 Subject: [PATCH 09/15] fix: Track last updated at for batch export run (#21082) --- posthog/batch_exports/service.py | 16 +++++++++++++--- posthog/temporal/batch_exports/batch_exports.py | 4 ++-- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/posthog/batch_exports/service.py b/posthog/batch_exports/service.py index c26be9a77ed1a..b00f0f4c98c69 100644 --- a/posthog/batch_exports/service.py +++ b/posthog/batch_exports/service.py @@ -439,8 +439,11 @@ def create_batch_export_run( return run -def update_batch_export_run_status( - run_id: UUID, status: str, latest_error: str | None, records_completed: int = 0 +def update_batch_export_run( + run_id: UUID, + status: str, + latest_error: str | None, + records_completed: int = 0, ) -> BatchExportRun: """Update the status of an BatchExportRun with given id. @@ -448,7 +451,14 @@ def update_batch_export_run_status( id: The id of the BatchExportRun to update. """ model = BatchExportRun.objects.filter(id=run_id) - updated = model.update(status=status, latest_error=latest_error, records_completed=records_completed) + update_at = dt.datetime.now() + + updated = model.update( + status=status, + latest_error=latest_error, + records_completed=records_completed, + last_updated_at=update_at, + ) if not updated: raise ValueError(f"BatchExportRun with id {run_id} not found.") diff --git a/posthog/temporal/batch_exports/batch_exports.py b/posthog/temporal/batch_exports/batch_exports.py index c776e1f245ef3..c40950c654426 100644 --- a/posthog/temporal/batch_exports/batch_exports.py +++ b/posthog/temporal/batch_exports/batch_exports.py @@ -22,7 +22,7 @@ create_batch_export_backfill, create_batch_export_run, update_batch_export_backfill_status, - update_batch_export_run_status, + update_batch_export_run, ) from posthog.temporal.batch_exports.metrics import ( get_export_finished_metric, @@ -542,7 +542,7 @@ async def update_export_run_status(inputs: UpdateBatchExportRunStatusInputs) -> """Activity that updates the status of an BatchExportRun.""" logger = await bind_temporal_worker_logger(team_id=inputs.team_id) - batch_export_run = await sync_to_async(update_batch_export_run_status)( + batch_export_run = await sync_to_async(update_batch_export_run)( run_id=uuid.UUID(inputs.id), status=inputs.status, latest_error=inputs.latest_error, From 6b6d40a66607adc55f461b05de4c4057255f5c59 Mon Sep 17 00:00:00 2001 From: David Newell Date: Fri, 22 Mar 2024 10:20:37 +0000 Subject: [PATCH 10/15] feat: sparkline errors (#21081) * feat: errors page playlist link * feat: create playlist from errors * remove sample data * update title * add frontend protection to samples * feat: sparkline errors --- ee/session_recordings/ai/error_clustering.py | 15 ++++++++++++--- .../errors/SessionRecordingErrors.tsx | 12 ++++++++++++ frontend/src/types.ts | 1 + 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/ee/session_recordings/ai/error_clustering.py b/ee/session_recordings/ai/error_clustering.py index edc06ff471355..7a3c12c44dec0 100644 --- a/ee/session_recordings/ai/error_clustering.py +++ b/ee/session_recordings/ai/error_clustering.py @@ -6,6 +6,7 @@ import pandas as pd import numpy as np from posthog.session_recordings.models.session_recording_event import SessionRecordingViewed +from datetime import date CLUSTER_REPLAY_ERRORS_TIMING = Histogram( "posthog_session_recordings_cluster_replay_errors", @@ -30,7 +31,7 @@ def error_clustering(team: Team, user: User): if not results: return [] - df = pd.DataFrame(results, columns=["session_id", "input", "embeddings"]) + df = pd.DataFrame(results, columns=["session_id", "error", "embeddings", "timestamp"]) df["cluster"] = cluster_embeddings(df["embeddings"].tolist()) @@ -42,7 +43,7 @@ def error_clustering(team: Team, user: User): def fetch_error_embeddings(team_id: int): query = """ SELECT - session_id, input, embeddings + session_id, input, embeddings, generation_timestamp FROM session_replay_embeddings WHERE @@ -76,13 +77,21 @@ def construct_response(df: pd.DataFrame, team: Team, user: User): clusters = [] for cluster, rows in df.groupby("cluster"): session_ids = rows["session_id"].unique() - sample = rows.sample(n=1)[["session_id", "input"]].rename(columns={"input": "error"}).to_dict("records")[0] + sample = rows.sample(n=1)[["session_id", "error"]].to_dict("records")[0] + + date_series = ( + df.groupby([df["timestamp"].dt.date]) + .size() + .reindex(pd.date_range(end=date.today(), periods=7), fill_value=0) + ) + sparkline = dict(zip(date_series.index.astype(str), date_series)) clusters.append( { "cluster": cluster, "sample": sample.get("error"), "session_ids": np.random.choice(session_ids, size=DBSCAN_MIN_SAMPLES - 1), "occurrences": rows.size, + "sparkline": sparkline, "unique_sessions": len(session_ids), "viewed": len(np.intersect1d(session_ids, viewed_session_ids, assume_unique=True)), } diff --git a/frontend/src/scenes/session-recordings/errors/SessionRecordingErrors.tsx b/frontend/src/scenes/session-recordings/errors/SessionRecordingErrors.tsx index 4b2dd2d1abed3..8b73fbcc1f924 100644 --- a/frontend/src/scenes/session-recordings/errors/SessionRecordingErrors.tsx +++ b/frontend/src/scenes/session-recordings/errors/SessionRecordingErrors.tsx @@ -2,6 +2,7 @@ import { IconFeatures } from '@posthog/icons' import { LemonButton, LemonTable, LemonTabs, Spinner } from '@posthog/lemon-ui' import { useActions, useValues } from 'kea' import { JSONViewer } from 'lib/components/JSONViewer' +import { Sparkline } from 'lib/lemon-ui/Sparkline' import { useState } from 'react' import { urls } from 'scenes/urls' @@ -45,6 +46,17 @@ export function SessionRecordingErrors(): JSX.Element { }, width: '50%', }, + { + title: '', + render: (_, cluster) => { + return ( + + ) + }, + }, { title: 'Occurrences', dataIndex: 'occurrences', diff --git a/frontend/src/types.ts b/frontend/src/types.ts index a7f65e84c5473..be96494253ae8 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -906,6 +906,7 @@ export type ErrorCluster = { sample: string occurrences: number session_ids: string[] + sparkline: Record unique_sessions: number viewed: number } From 3a995a5f9ecad0b1463d6e2f9818538946175d64 Mon Sep 17 00:00:00 2001 From: PostHog Bot <69588470+posthog-bot@users.noreply.github.com> Date: Fri, 22 Mar 2024 07:11:01 -0400 Subject: [PATCH 11/15] chore(deps): Update posthog-js to 1.116.4 (#21098) --- package.json | 2 +- pnpm-lock.yaml | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/package.json b/package.json index 79946a087fbf4..2128fa3207a76 100644 --- a/package.json +++ b/package.json @@ -145,7 +145,7 @@ "pmtiles": "^2.11.0", "postcss": "^8.4.31", "postcss-preset-env": "^9.3.0", - "posthog-js": "1.116.3", + "posthog-js": "1.116.4", "posthog-js-lite": "2.5.0", "prettier": "^2.8.8", "prop-types": "^15.7.2", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 7fc1d7cbd22a4..04806adc67be8 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -254,8 +254,8 @@ dependencies: specifier: ^9.3.0 version: 9.3.0(postcss@8.4.31) posthog-js: - specifier: 1.116.3 - version: 1.116.3 + specifier: 1.116.4 + version: 1.116.4 posthog-js-lite: specifier: 2.5.0 version: 2.5.0 @@ -6796,7 +6796,7 @@ packages: '@storybook/csf': 0.1.3 '@storybook/global': 5.0.0 '@storybook/types': 7.6.17 - '@types/qs': 6.9.13 + '@types/qs': 6.9.14 dequal: 2.0.3 lodash: 4.17.21 memoizerific: 1.11.3 @@ -8200,8 +8200,8 @@ packages: resolution: {integrity: sha512-bZcOkJ6uWrL0Qb2NAWKa7TBU+mJHPzhx9jjLL1KHF+XpzEcR7EXHvjbHlGtR/IsP1vyPrehuS6XqkmaePy//mg==} dev: false - /@types/qs@6.9.13: - resolution: {integrity: sha512-iLR+1vTTJ3p0QaOUq6ACbY1mzKTODFDT/XedZI8BksOotFmL4ForwDfRQ/DZeuTHR7/2i4lI1D203gdfxuqTlA==} + /@types/qs@6.9.14: + resolution: {integrity: sha512-5khscbd3SwWMhFqylJBLQ0zIu7c1K6Vz0uBIt915BI3zV0q1nfjRQD3RqSBcPaO6PHEF4ov/t9y89fSiyThlPA==} dev: true /@types/query-selector-shadow-dom@1.0.0: @@ -17454,8 +17454,8 @@ packages: resolution: {integrity: sha512-Urvlp0Vu9h3td0BVFWt0QXFJDoOZcaAD83XM9d91NKMKTVPZtfU0ysoxstIf5mw/ce9ZfuMgpWPaagrZI4rmSg==} dev: false - /posthog-js@1.116.3: - resolution: {integrity: sha512-KakGsQ8rS/K/U5Q/tiBrRrFRCgGrR0oI9VSYw9hwNCY00EClwAU3EuykUuQTFdQ1EuYMrZDIMWDD4NW6zgf7wQ==} + /posthog-js@1.116.4: + resolution: {integrity: sha512-PZg208/k5OZRQbd9tnGvUgtyRl1IAYyyh74teyIDIH3EnlsAolBlVM4gcoyEYoVkUi5sZLKitj9gTX3/vnEG4Q==} dependencies: fflate: 0.4.8 preact: 10.20.0 From d5cc9e43b9e940faa856be6504932569f8f8e1f8 Mon Sep 17 00:00:00 2001 From: Ben White Date: Fri, 22 Mar 2024 12:28:09 +0100 Subject: [PATCH 12/15] fix: Toolbar better auth issue (#21096) --- ...s-other-toolbar--unauthenticated--dark.png | Bin 10791 -> 12371 bytes ...-other-toolbar--unauthenticated--light.png | Bin 12070 -> 13639 bytes frontend/src/toolbar/bar/Toolbar.scss | 4 - frontend/src/toolbar/bar/Toolbar.tsx | 39 ++++++-- frontend/src/toolbar/bar/ToolbarButton.scss | 11 +++ frontend/src/toolbar/bar/ToolbarButton.tsx | 13 ++- .../src/toolbar/flags/flagsToolbarLogic.ts | 5 - frontend/src/toolbar/toolbarConfigLogic.ts | 6 +- frontend/src/toolbar/toolbarLogic.ts | 87 ------------------ 9 files changed, 53 insertions(+), 112 deletions(-) delete mode 100644 frontend/src/toolbar/toolbarLogic.ts diff --git a/frontend/__snapshots__/scenes-other-toolbar--unauthenticated--dark.png b/frontend/__snapshots__/scenes-other-toolbar--unauthenticated--dark.png index f5283e47bdf438cc2f95a77a1b24ccc90adc38b9..dc149beabe2037e498026227fe248d0fa4cac62c 100644 GIT binary patch literal 12371 zcmeHthgXwX*Ke?Zjsi39%xgnIM-WGmN+7hLC`e1B1_%MA1xSMg0wE;1=kfjSy=#5<-gWArptV6xvqD}nWe{JKI!y|uta3bK3x$^R#&;4@ohwmePh`9Dsn$jN+g0}uv z@$-R_V;3^YkJjBic+PFxl}%?a_@{J?O{jI)6)`E-Q?f+-7z>9B+$Lte*dm-c#w4Md znYo=WBTdR7VqizFFuawa4=|UgEvKR9(M;Gj=(+Vrm>hUK_l~sTPfzt@B&q5u4vg2F ziPkgXC8c|B^^%Vd@b`hCqd4619ly%Z^f&2v*>_rxsLsu?5#0KzB0GE(s=R)3vXO`7 zaVnCAR)7EaZ|6^_^5!?Qv^f6Ke3pTqBs-ngRS(XIF9^Dx(iqZT22KgCh@-%Hnj^sv z(Ym#A!RC>L9%xQlR^zxz20iHg<-yONpFGst@bk(w(!#IvsMBgx3%m|-_(Z!J#_q_a z%qLZGYg0;HWPWFtN(UX5^3f86^W*ZT?sRF&ax~}l-oScl2+vzjkNS!I9GGSx>ES+${ zV{%WcG9zP0+)DqOnxPL$$-Ek7VpuDW&@oXY)1b+u3v4hD<3YnjF*InN7{AIn{b^LJ znbe{Q|$feehn|^L+#xRwV z36-6lrlOa=f_6-3kCn32l$jsyKjez!T<`5Jn(ycKs5apAarI1P(a_%FNbx@RP*QX# zX&x=2FO#Pm5&1brG?zjLIC-2{($IP{BjUn*yJfvYXp?BvoL@_xeSkn@DjLs87O8%w zQU-kaj)zQKN)IJePd29R4u_MZ2FT-#EI_=WJ z60^UaJ1!h_vTcB)i7&AQL#qw7)R%5J1)yc`ldN#q7*2*2%okeXS8?9dM|%yAXyz}b z5!;0fW~*qj7vDp>B=aXuELoU}ruzkOyd=W1eqx+a&RV5SJV|CZ;UHD{CcE$XS;=&* zC3f;hg_%->#c?zCa{k72F}Zt{j6P2s3lk8TLzQ;O;@mKgB>2~XVDePCs5cpxV%QuV z7&RQX^pzsRw9RML_ue;MeBN3qXqdp$ymC?<9ZZ`!OUHfNuG2ix6l-|Y%_Mk15Nt0z z&Lb|2;wqY_#%1MANTlY1?(W=(zPCoQjrA(~CP)e4Yd#9hx+#Z_D6>X0X0BMUA2)W~ zLYMre9XDlPd%g7+mFqo5yU%~rsU7=y$1d$pL+&BH#H0}7$T5?#+u>v)nmzp1bm=Z( zdU(o#A~fS>tKkLx8@#>6e)N!9?viPtbuvXx%*#2PwBa!>yJKNUEI6d83O`A&UiiE8 z`s9c0qA4{VN4SKSup7;nrX8Egr#@RLbxPIfnt0j!jH>0xC%jDCg}zx2e36A`Y$ zXQ@eDwt7vuR5i(rMp9WZwkF+gU4ZfI3J;7X2Z`+5`glNTu$}0`;`O_!4yG2#=6siw z>16^=3n~3Vw@JD9D&=s}N$o@&|Ml2OLrZmBe+cen7;aEnP@D7FJ__aQp_dGkAS`VTgpg>75p~z8W(?; zuH%%i(?f`z=XN8~fldsNvN=uBT%20R>)#5mI6G(9Pe_TeE2Wyk0C)F6S~4Y|7~eZ% zVafiST+5)1e0q8~e9{J?@-KRI-$gBrhmK%>QhI$g`m)%qc(_PnHg#jvDy0*duV&)_$E zvofZ&*Hhr3W5enlZbA0l`$`G<;c>D4AT-8~SJcxct`~JTIW;t9cByX9HD{SIS;VO!iMdI>Yz~u#4_WB=@Yj!(!0@J(YZ<*?rDj~V zsk(0R(!}j4oe}>Wo!xDfWwqUB6mSOg(1vSJ94a9Yr~)b+pQsjHO{LQpC}zC*n+*cY zSYAP;y=;RCPd(>6k!QquwXgQ}+o7fLIP3P+I-`g(5L|6i4nz&>;w>i^gX=}aEE7~5 z9#IVUUFhOPBJ%s`Vnzn_WquqKV4iQmD#hFK)np|1eL2~u#$C&lx;ukh9rU^)qaA#KuND<4p-t;Xv$dC(-A|FyQO)@^8t{IxAdFjYmyccc#4v<}}lU;aE1)}>SH zms3`2*P3~pwAAJvh(MXp$2l1D%VZo!Cm|#ou-Kg|3Lcjdanu?br9UGpp&*|{3UJF%gcA{5;xhwQvs{qd*%eDFpNR? zY*qBqN7pR^$G!Ofzg5niDhbp(meQ3-7{^=%i1?7ED_VvWad2XgCK z`MDANc~ib2;#1=4Xkifuu=a$WtW@>rRu!iZOl-mmjdcsF*L+gTj8lzG6xWU~97r&h zs*Ac1TnA52PbglG#jxIIMzYFk{YuM-b1NRvHK7<+vvn837VzY#RMbs0!ToW=ay@o}Cs?g((E(*g3@Fk*G}wAqA(Ap*{M*@&ch zRtiUh{8(k(r)ZBgG-7AY(Q4$QxmVDGRYR zykmPiPJL~PKdQ=;lvUc-dPWLklH)h0YVJjslkY3CCD*nHztp(%Uuu$hPc^K`$_ z921n6<*LY+Uxvz0S2T8k<7DEA@DIiCjGWMVBO3!WcZ^-dJ7c;sFcm)V6yLcSI0-M7 z*`2%8;}|q970cFfrc2ZPVVxcAeuLMXd+^d_W-Ol^l>BE+aQ|2QsbdJUmAe~UlMH6F zm`g1wjynuDXPGg|dF(2xeb*mbO@+gHe($q`X7z9+J?s4zm9X_|eI0uY3hHj!9HtMS0e0SZ(~q0J1(;;jNy0OpfvB z&5PNm)RIOiW2EpZv8s6bb)jV%54-y4O?BUc=;gO$=F3!vE{zPY<_}A8%m$?YJ3t91 zNOM*Zb=!*DAR~-$tT0IK7|Ao;VpS7Py{{7bBs^|e(#RtVT_!(T*V7?o5yXM(i{Xeq zF#`{@WjuB>LEEL``RokvF7MYRc5Y~2&DP(C>9-folenW=hS5zXO2W%7N{tgho0l8p z%(mmZr_Iw2RQ-sMqaY!NNjd z(37&RF z9%jD;!RxSfk{z(oL@|rSax8OlEV-I4&V-hFzg{qc4bf;wO0KCV7Zn-`X|&-35v2Jy zbpZE6@$XGxbF(D?83EvJu4#B5_~Wo1g98AVU^M%eiUdsL@?5XyrZ027g@&l{#sL}) zO~Q3&9xo{^wFQ6sR8$l+JRq63c+V|uiA}qxk#_Nw?Tw(pfq?gf%l=R0BIzSWl`k||(e zuSHA*-E2UW9w3Ts;VmweTP*C@VNYG$D&WK)HFI+i$7~mt%@?&GEMMXsr*gSmx(WgIu zI1I_=go#P*IkM0T$m*5q;R<%7QonMa2+9IRHUvUpF0mv1z9ohiVTsW(JD*eYSdv z^P8M(pS$-NU?0b#7}vWjb(FPnKSR+}{i`r`S3yG)qugkd@K@{j zJ2mDp4cnmZ3PZ&}u7bQoNXOCmrw-uVr=H)@tq;1H$8&cQXMUUEsh9;DPR*Z0px>fw zm~tMYS$A4eR*3F3M3V3^B^27E%gL{=UPXDG#=EogR##Bb4;2yVGtGg!4uW^u#?wgH zP~bfBr(f>r>jyqWaaQ{;`Yl;%k8TET*s^aKR##mmTUV71Y;_^VjJC%pC6tM|r|+F+ zZURme|54roru%qw3Zbm*lu@_kvvry(>MAD2d+A?;NzCmhR3M(y$Dhl$yvmZ$=-LH3 zwXv9}I2sYc1%00{lYctAB|VK4p2HvLHw@$OLOTzjr6i7&QY9xAANTnWg(tL;FySs) z(kcFv;B_I6`s^3SKK-cR^GW;n<=o~qp^>Tf#wu)|#ZxBF;*@i8as;z76HP~x>}(xk zoe46Q#a2qLrO2Q7@f$6a&#!&4pWQda-AJVl44Ps3Bh3v-O5QP-^YpN_|Gj@ZcOS_K zPBEf+Sb#_-YBQlOR6P17XInyOg;yOsQ3gmE7g5mHyL60q%e?ovVK_D;zdy9E8gxSO z_%33rKg59ndInI0(b!WBJ6ZAfNOWJ@0tM9N4gc zO}>Y=%dGR?*OuhTo!TVdHvHDNYRD}}w|?-JH6q(J9!y<~F$vz}9rG8?#=yUO?ojwU z<>$QfkN4#r_Z>ylG>E^ExZLx6OG|TpMvzTa++De&?I}WLGi6hR>S?gvi+8_gXNxLQ zCJI%aDTxxbzjqg+!T4O&AvG8#kK^c>sgS#SXXkGv+MRJtA1uY@;N?K*CxsSnmi!ii z+PBFr`&xg7MPY{VBe=_N#{K0^?SwMty8i_*YbqZlTU_stB=44i?D^c->J&`a#)cnt zr)!@Ele|l`S79!ksU?iX0V$mo;=T5_kBrOmX~9ay!2I}>NBh}w%RQ5NOSXn$p8Ldl zl;wIawyN`3sk%*h^SX&Ec4e><6mmywpi+8cy|PR2Rm>uGA8Q83{oxy*kPh4yqSPAa6q`j#$NGcEsA6WiUswsIv+aa4o&2 zLTuS+*XwxU2Q#~Qbu4$Zvm!emR(Ai5dm=H@0J zApwZTF3EB|(|mO-d~^MHbKTqv+pwi`8P`DwO)!>E_dskh)h*?Fqi%sw_F$=)*`27i^WRfl`s_+OCM6B zBY)e238Cad2zNa@<_EO z{w^uHQkKbv>e0>DKT%-pURZ7?t5t1WD`2;rNi>~Ydq636=HJdUZ`-B$ph0Tx`jZ^h zl&NVedtudGtx%+4(`I}^-KGeB68psZ#3G2$88#cf-=Dmv26kMg|C#3G#TkC5`YSp; z;s%1*j;3>(vQFOOWDiM(eUoQ&P*_{l+um;3+1dQsL{*Bla9k_dlJ5ety1phf5ekV3%qM0ZR`g*W}F`FUl; z(aKf>NCD+!>HS7g52_O+3I13(b>_p54jn6?JC_$VdPu;eQjfRC)`W=6c#T>hg*p}m zKLPc|AsYj~PI=PYd)qLG^>+dNhSB_&01q{DOhH@ehHm5Hq{l50D9MR*0jRzFn^uNG z%vOJA?K$G$rAb~BMU&S54&o2{ehWwj{&rY`@0__*J2g;B~xl-O+vmv@5+3gmli{U@OjQMHOIrI>nY? zqj3NUb2=qOe#K&Q54qfG)CbZgIJWY?|8lWiYxDBRzcUb7Ce62|*l(F!(&8Tv0gfb0 zXOP#fCDNS{So^3LTGB|YpeGD84w(SCcBVOdq)C+5<|^R(*VJTd)?T;2ete{2W}Jg+ zb`7nQwsmK(T*=1@N9;273uadaKrag+n|Yi?ydnIuP)bN2bVii58aR90y5{80QG~iD z{f7S2&gA-o)~eCJrc49l88>bS>jPyd0EZ-IK71DZ*L5f{w$ZABG@XJ#)(nn6sdtTx zUhZuV`#9r#eL^BE3D{Yt8ol;(gN-EH0S{Ye5i)k?;||q=_T34+8x)+BiBq?cZZOHy zl?X)s)5GVl^XpRu$w*o6uJ2aInTMO2y%0oX-5&PC_?MA5^uG~6NOQG^u-k67^EL)` zW%lc6DH$k5QZR;_kc@;AEK<%TZpP!JTSt7UjH<~8T1RZ+aEz4_HNzuj+-fUe+@PhO z%w$*La@*R9q^N=K7r(r?UGw$6qXB>=!F6AssdrQdC<_BDAXw5Y=JZEl@L0>fnFkGK z+zPCc<=XT8w?9j5QkeM|OHhSU)jJeT#xem;^h7f>@F&@y_eTyCSG|*GG5^xc?=DO> zAN8>S{Rt~e>C)yZdZJ~fc6NX>YmXb_=9E1^Wg|YlnM!6GKkhXOyDp89$!c_}-mfi1 z(;A|>r3$gy3Tm;Jr2?$1_bYOU&RX%q%z3s;>PLN&QR0jvQ1Un0n{EiBFB`(_ywG9! zv6CHkc&4cayAJ99)00Qp=WJ4kdGh=Avmwo0P3a9vUP)x}nF;wsWUPuTGsk2>8>7-ABXN z#f&Jiw{HF1q`73EKTFtB4RADj<*P-wl%RX#VLXQY76ZNg#@m$8tdd)wO3#N2va&?d zRZ4dO=<2kJ*!L5F-xxVqu0*6vPd?P^`M4V~DjbHa950L2&?>9`J&}2Jw|z}u$)P_Y zhhjpT#+7iMeraijZ~F_{%c}IjS@c$ zAQ+aO0nBr;lAnjdnb4cU>2_W}t*GV-y`dl}Twlz$Nf{wfDED@IJ!7C{mz`W9z9xeb zL*2%*j;>JIL!?iGSD%!daZXSsJTsQkjJY{FStxha|JwMhY8=&b6T zuS2QL^6yt@2t6CVu$HVN=GHn087O>k{RrJKa$&$O*F{X)7IdKX?m1R4qs>*1(h2V6 zf-hOl4-oyhl74)uy4uDUg-QX%OlRrXdkzBS8Vp?ow)Dk#QeeTL*3*l-@EscxsA6B-jHZb4s&9aBF&nn~RuX0DDWGI}VVJx<*EKB$$VPx_^j^@MiFJ_!~^Q7gJE`Fjr zs;jgLiEf}kt>r5Jre{0!>Q3@Tql-7X$c=A{-h!4`lqquwKMU@RsLKk)#aBwEJBsru znd|HOm*!>M{-{BN`%o7b+;>)ug|XBC0(D^CV}8|G?%`X$hP7{xGbMay z*N~M#{#oTawGA|tPn^BS?!P~G8VDi&m7qRXZf&-<(bTcXt|Osewfm03fhRr2^b^qc zT-q$S2l;CR!&63fKKsQr{IQWCusPG&Og;aFfyV}s3+KO$Mfbe~qIkuj&aL+*G}#yh z1O^5+?D;rwjwuTojInHKSH)^Se*D-bWwl9$1-GFI*L!_hFzjacT9%|mL8|qmrrjr~ zm0gU_|4ne)cl+m<{JAJFE4w%%X91yg~4^N!cOg z>Xm?oZ5I+H&5X1&^>LS$gI)1(s55c@Z`#bx)6faGo4)_A+vHX=*r^xFoqjTK{TwfM z{N-lvcE_k|^&2K-%W^xYztM+{HVrG7L3YvQwa&VwSzO;3H(PTYTx>Zw)(7}QCC z)$EOv`|N{TT=ykIGvid^K?doBRcMN99wia00&)!t;Bv`C2(V>CDo z6O*uE3Gys&|E8TFXs|}~72^X+af1}*=F%gBmkhUPr787Puwr6J+Xr6LN9q}4e@8Iz zS|t%KjlM8;wW=IcwXI+Oq^3rf__RE{nsTwG-yrSfm9mIG<9DbbT;#Mo&w3Z9IsbHe z4$nmoB&Dt$^OP!_<}DF0N;0TXFsdkm4gcNeH}v^M0QRADL3h)Uhse#u^NEy0`kqH5 zS-)(tE$y2*AG8*PAJ_8S6-sY!FkLp0)UE4*gY_@SRa}`rwd1u(6J=BIZUr;zmV4L= zL2yfy+>4w&!!tEOsv3>W%cGN{*5ttY4z@)*S>s%DGesJ{1}!I`Tm9v|V{*0*a}l0D zd)HIsg9knDzNv=$JQu}ZF1+*`IcF|I^*v&=oqWj_UFU~{tSoM?v0Q}_dnM}VO$|o( zmhKcQ?{?a8rt*6Es-&oRF+&w8)sftTgYDz*$?L#`EBaqF-h?dKwJ0&SuutZoZY8{q z2wy_@$;qF66+XkdH9P*E8ehJ9V^3X4+x~X6}29ESEeZ++)#G|b?5!{LoWWi@Jxcx6*mc) zo_3SZZdlcaZJD}oOU7hob*(i&tLmiefVD9{G~5fp#r-5EONF*}w4TcAZ<}fx&&k~kAjpqJdA-#D62^SY9~$5thjCUcij^bQ{=>*F zm1Wq;%crRF!8lyVmd!5t^+6H-66k;ym4AWdNI7C4`N6A)cUmed%wK@|7O?fX_sO>S zy)D~@18t61)Xv zB`|PW@qRGuk>1*AwdUskAqwG)Zc|PKiA;%1I&H_M75A5AJ;b`QS3dI{%sn zjQ!UfcnKWyxBu_&do~?}X8vRcgL)hBe{7G3zFyY<=Ap3fj(iKZ|EpR4|LBANv-lr@ z{qGxu@R1*~cMtHGy!bg3c<=?@H2o7&)GfULfgFQeG5+ILNb1sL=xay! zv=zFuwJ!dio79QFcJKZxWGb|6IEr8WaPW=Pr#DXUL0??@S7cM>uH0N|0e^@5OQ+xX zely;kkap~^OE)fPLwBF}{8CZ;)%vUDA_nenWv5B&!NIg^wTnO2&I`QCuMMK8IYnG+ zn5*gOG9SAipay6Hi9F%{9Jy=fleH;iH`wymzDV%>#TmZC;PXgg942HY+f#)r22~GT z>)MYl7BHx)60M8~^0}y$0XC6gF#U?FLl(UJ^IW$!Y?OaEM}@vJft()Ak&AI=vvcSf zJf{C8%GZn-_;hP4%pae(*nQ!tp3d+~A8W8Ouv zsyTjDW7;mOvneeyRiqdk$g6b#H0;G)+L#K>9NbX4o06ATK|3$GdQzDQ=5z z2-!N`J2fTCq9U(93JSVp+)RWKj==EiR~^@~h|^;f)YzOsT3T9x5Aya*EgxW~LAS<0 z$TgCO{REHFh5ZMLWlDGra9*n3&I03pc=g~hILnY$?2(*qxB}zpEZ0wq(b<8P%}a#s zQ0g|_Df*7HccIpFxs@e`WEh6lXLQo0qlqwVl&}uQ*6`B;Xmbf``V) z^ndUb30&p`u@^>-nzd}4p%na_!7g1?!e-updo{=D=5`IoP>Ml9AlXby%hn!raD>mN zD>FnRo0&}-eZjKT2{C0KQr}t)r}xEJwtakZ=(IlL=|!Pv@8X*Lp5TotTD%VFv@DNv zQQf^mV0u~4KV~iiDUae1Q>NYI#%rhzvpoxy<9@=R68gM}Y5FwaTl(~{gDUG*$8yG) zs#^8^;N#pG1NKT5oZOz%_SQRA1$l7QLnhp%GvfPl2)CvUjECqe@!G#L?F0n{>G2el zxMxPF*XVS-Ga{SjHPwO2KvHOR*O!<@>w0-(8Gwr*xt_a(B3U$orF7_eY3cY$Pm{pF zz|??Mo&Cj2YomB_fip88HFWcQY16d~O#d}KJ?pm7uuou7q& zi6Z{l4D34=7XIVi)~t?YlnjDv>=-&Y2=9G5*INF-L1fCIO9P*V+Kz(vwR<)T=`Ben z-3fM2MRrS+P7A51A-LT&o^7)!c$kp>{I`*r>ADtjiy1k#-%`1LYGOjVZn@Ts%BXoV z{dYWT&kJ=oleUDU9&7>XVGwndKIzJ350eM1EOWQF1U)j8lY>?s!$;l=v_+jvlO3mR z(W}|fa;;ZR$~p%#t??Yjgx<-iss*LesW4(Dx%`1uRzZPEy-HXQMq@$Gl z>VLHL_Vx97j`mv`sEeKs(Zg~&hFO!90i;2%M2XIxAT3(nY`D#uULN5euOS3?MKnFg z$&f*F*J7Yh_-0jiObmfBiNSomrvYz96g=!QR;c8Zb=CiCLMh5o(42VR27TCs1$A8b>w7;1QxlMjc$$6Pqwm| z#R-l)N$-!)uC6ZnnuA{qh$juf?0wLed5c?u!5JOx#WXaQX+IG?-GKkRVv;>FZ}#bm8gOYe`RCe5ogj6Dh|U9HE8sm)F+= z9305sE{%L=c(|XoTpYVM%M^gazxGkukV8&7Y6L2Ie$igt-&Q*%&RoeVz$qKVypQ-z zd-~Tbf@&CdinWa6Fi1Iz?pKrb7%wy12%#>1oo_41H@mP{?8t+hCAXME7ExE?m0FE> zYrLorA3kIDtl_G^=g~{8P=`tZ3+%oF?>e_ zwbnS1IX7o`%kV@Ic8M^TeqKFdyWG9hC*=YB+vn$P0My7>eF#7E_U&8p#;`MPj>rBI z$XS=GTnl`&vfLITjXckcpM8-*d0d~<$CUj_`#Zo0Oq?(N#V63nWwX#!S>L6l7R%Xy zi)E``LsoZ_r+D*nFJ*45k7*gz>YLysWOO*x#E|cwpJ(Fn2B`HW;?<9N^-qKhPgjn4 z#f*gJEKMWD(#3S99tEa`jg9WIHJs8mg(UVqBZ9 z2xqz9GSg>FWVm*x3b<}-_h)9;oL{c-ytYA6Eu|^;(64jY8|C{q7YeIHo4t!Us)p?M z=(urObzKhyiT3bddoV9o-dwFq@4DUaiMMjm^O9Op41SaS$T7?=C{VcQ$dmIh{FGG} z^G-G)9@yzSp@ShpjRAL%OaAoP6#tg3jkY!*N#5Lo$-Ca0{`!eV+}M7;1)Pi)2Y*$J zYUS4T-Ze13)7jJg)-bD%q5cn|22&{30)5@8$Jn^}84v)s9ekmzWd^hQ=PMQRGIBZQ zM%Etf{keI0r<5$G^kq?7z2`Q{%cgT-GGfT3^I7?f+hvw9NvESQn4lrMx|MAuDMA%> zdn$aba3pk2COVzuLBSmy-!wI5|2Sl63`D4%fSfpkL^$m+iRSGp-59b`=}xd&YDm&; zLp!3d0$$eC{y=n?q4v~z1`#zYs52$~9QEV7;rrm2LzYqT*Ypj!H-4;s4_&(xQ$<46 z-RFMx(UxL`2N&BSCeh0JRSeHVn2U#ul(Bl7M3FgDY9Rwr@z#CMnv={|q;eYVGxr)l zKotfK+Ciwx%h)wo^|^+ip*%Xs1EhS;9IqVRR)7zRlubu+7^F4bXdA1)e~CGeF-X$D2L%VWzBH8;)uWd9 z?u!yFsAQ`FEKMb~JmFR9RICy4=dZyW-3>!gXnZ!wo*f^={0Z{mQ zWmtjm@;RZfq8A+jWo#9O?^& z5`AWoFalYi^j`a`bBno6(|9Rg@nZaDRrP{rRr7Bm{GDKo7t5esQ-fwtsn2l*$X~Ot zQ8ed(&_{qfqE5=nO%Ee2^?7q*4Tr%D4GnEk4Fy^xJX4RF%5AiSEarKV*=sOtV9)uiMGl(WZq#}zCruxjz$y6Lhv@6;eBW!F^AAT8j zWrtB(OTQ%zY$P7{Jzj4yE#y1I$2SvW({WOHK&2ZzxOn=+a+ zXKWvQcHv~p9x0#0u=*8OX>JQ_BUu1`>{(cbHj?zAcZwA-AZzd7;D(dPh|9upT^&O{ z*OazOPLrRC@s8xA2j8A}INinZqcO!CrJbCd;6uqd&AOD2!Z;)PJt}trpH)#&(eug1 zk=YbNVrfSaVff#S zr<8Iqrd9{6N3i4mQGz5E_s7)H+q>q#;v!$?UJ_)d&Z+p%GN)#@IM4w(;<5V}_&j0$ z@AAi8^tA2wBj4{Bu6)jG!tVq7&YXz-cjy18jg)}O4hWYoyDxylF56o3m1Jit4%xcJ z*#>rLxlu<-)dPNh6;b)>;nt-!3}h8^C1IdKDqMb&M1Z9f7X2}s-oJkz)7{;j zIhXSGt$Y&kl2W|g(l}W8K`QcHAwFJS(TZk?>!A&0WtxCHS3s(oK`j064jg}Z2m&8m0G%pcc-`5ORpB%6P*yifG@_lHTXt^yf5v!>=exby)rwb-JfqECUlv65QK z0X2>-TS>f$*4Ea{t&wlv4v+d&T8?=8_{1hACf@h;jeB}P%*5NU)3x)s3F0`*II$_Uw~e*o)5 zKC(U-2}l5aywH~&aR;V#h~)Cq|7_pDz`$J5mznaaDpR0pi4|9uH;EO80wJc_EEE|Saeo3jO$g#4Z53hC^L*YW6l zBd+dh3`P%Na&@;>5EuxLa{(d)K(uxB6;m@aGr;yo@vtPK$g;A2*!ujdtgSG$6tq7Q zi9D;N)dDEhF*R6KSXj8iOg2{n_mX9b!+!Xn^tj+ERu~X=A%cSVI+=MX1`L7LSP39B zlxk~hGXY%ic|-6T(86z8Kf8ZRbyEbFQH9gGh*Shs0~8A({O`4Vm}={^^uRyC6L#H5~#~ ztvdwD+?RYs%0hPY;Mv#5{et2`3Oox|#U*|NW8k`j7izal#ck9*hyB?lwOd1LTlX|n zuxYm7ebkHeyN4oG^tS!K|A5VOSo@JWo_}VmOXVNFuXX|GjvUjV@;!9FCCBTie?vh; z)xJ8#I%c!{RK)CO^xVF)lAw0S1e+uwHXiHF*}Y!S3)Ew1Jer(q?W)*Jc6z`%qLwEO_JR+`WD6jvr<;Nu9ni6@!jSNotmR#HSz zu%a{uXRvwM*w{Sz)67Hvt6&lp;JRP&X&9Xa2#XthIW*}gK3z`m{)uD2N*6cu6Ea zv%~=uLX;UNcZ z?iQrD^Y;tj$Y;)zyBl*4^W^2dOXZOSF{U%Fy!4% zdX0QPcxFQC%o!EezK&R~9<81R6tYXgKq+5OqSN&I}k&kq01V=@S;&GUN(o~fZ@k)-(cb^VDN%$bQJpmwr@RK>;weo_ba6l>fY z9Yb?Es|Td$!%(Qq(h_`HP^U!!r*9v^{&E1;oSk$>)+|ZQsok=`IRRTnEZ*76#k4rc z>EmKArHqdNn0YsxmN`m)deKuPy^j7Y1w>u?RGfA+29x9*c3us?zIyI^WX~lltNs^$ z!CHGX1N_~8%8MmCwz?RBCj4uFy0`JuGncD100Gm?G~J!L`W~(7C=^cYK|^5;TS;$p zg9uCWP7*vyy3M~yk@f|3y#WX#ZpQ(3wBnRY8q#W1}lQ^9E~MQI0nbQS}4yL>)&ao`!qmFwA8^;y)wmon<|9?Rzy zit0AEZP&Xqt2f6et~~~xtwF<1aV&uPX4DpHlP=L30JIQ+$f>BiJ=tpuG}jl+Al0sk)TUU zGQiysP)xU05rVGy)GmCW*KindFBMFM7J`Ks78iCDMbkPUTaS5+$F}wN;reUqn(*&` z2sFp(OdR!ZVBS7$SASy28O<3%((q*9iX+MvRYRkEIvzk6b z1;2}ctiv=PWVIJ;q!-mF`yWqfe_eCnq2H)CGiQ~Nho4VfEr z+BguIfveCad3kx^<-Aszuc?R1_!?>$wdcA z?BGu1;(#;c5p!X2CIs}_K*jcfNW6RJu5-6Eh(bz03uIs3D8%-`cFoPW{K7&5T|GUU z86`#Ug}!cp7E;#2vqkTRT>yqfZeK6sD=8Pbl8=ad%NygFP+ln(pyQ8x8>>Ax+r=Yg z&PC&j?>+`s-0-RpXf*e5QP2BH#%Ba~^|fHgotRfpdnEIDIzjOJGf^IvNy` zcVB#2F!w}~iWd2_FjP_L9nm-yImK`TZc;|VD5`Gkt$n?H8oDOfj|)!HqQqIYgY?>5 zPmF+U$hijABvJV&p#k=W2l9b;sC!DjXG1IIQYRq$ZyG{G35qsb3d3uk24*a2CDYpU zQk<_Ch1);YJ27$A*dYxI9N!R5Tou`dP^+?xpL_3axp}TTd4800dv{~sT&Bi9mD0gt zUBSp3x#*gywZ0ko0KCaw4~${Q-qk=R4A{ek zh&(3bP11s9m@l)WVFvv=Iufd4vTJtQhIUvyVh?AKfvGUh5MZthYleDkpLRS~si?+@ z`(uIw;#%a`nbUk{;urKJ3tvTNL6FU#r=Sn*BWSvI^TNHYCJGF#Zf_;{AZNo_^I?NL+F`p?4C{)k>i>MCl7v%p-19fy`tvD z06MrHi^eCah23HafFT>Hv8V)@ZMu_f_E__V>j{^2IsO=<9MLwm_03Qu@hRbVy;c7i ze>OPu!jgC;KoW_OEW7dOmywqO`9*QF4!vJA6#cjkyr(Mtb*l+7HC5CR2L4CeH^Gg6 zo5VLa1sZt+e$?gYrA85ifLa6HG@~U(b~El@LD~6JmE>o~2Ahqtcz^)NqO;R)jk(JF zCcTQEnNfTPbZQEZb{vr@uXIf(EShqnb|UUUZXHCNW70S;%zvQgXQH$01ajf8FV3{- zmv(Ts@AIsy56E($wz|)>AVr<}kp>Dbuj8x@{&c}Tf4ZLFK=<5VsL~%uQtss0Y$nLU z3eDM?yqyV4xkPAWr~z%$0d>}8J^D_B|NcH?)cQIBaZ!W^s+X(UBnz?G^lN;|T-ufq zwB2eB@RnTH9>6~K?0heTHsOI*8b}1_>BVfu{l@ejo4HBhqN`fESzK^>-z{N)n40p< zZaB7dv^1`N#Lj!J|06js3L<_{oNsMA=5az|vCZImm32kNKilvHsCKpT2KG+2y>&iE z`DqDQ$~S%=yakb7yb(EralJ^KHv*a>#}Z_g1?Mfy^I_d!G+pooD7|>*&B=%9z#GKN z* zxBvZGSj6vgr{{OYJwgV(;E6pZ1E3pnQKn$`=f}cqa7LdsgRi72K;2CI=r;I175C}@ zprbSRxnAmj{f_p3+(W$iHv|jsKldw@l*~ntgI~FH&A8}Kmw){S0QZsm diff --git a/frontend/__snapshots__/scenes-other-toolbar--unauthenticated--light.png b/frontend/__snapshots__/scenes-other-toolbar--unauthenticated--light.png index 15b96030b8a254d62484aa97934a3972fea6d9b8..ace3a2b7f937317a2e0525ef4a80cbf46ae0220b 100644 GIT binary patch literal 13639 zcmeHuc~n#9*KQCMts+%h5oE9h#fpG}fQ->vp@@Krf{a0BkQtf6lmttKIsgJHGDb@Q zks%5~gb)&?3dj)VFolQ^0)!+$2q9#;@9FQm>#p_PweJ1@-mb;sB%JrW(|-4}_w($- zy^D^vir=Vy1B1a7?SB2`G7Po_eB1=vwiR4xXWz$zi(J%Y+n-@IJ!`Mtvg-6d{IZ)d0XTidczs#o&|8!n& zzN}R5PxtR^F?yl9?+*)$)*c^uvH6D|1NXUU=P?DghB55bRM zy7){$;wDc-w5F>M_iYfLnMJusB{Xhu-1u4ihf#2OLE z9n0y=3qTt?ms#fSbv)J5DrQxi9wD^|Q?J$<6j5pIjSES@0Lhc(PT3)~9f$3*=LgEm zCBZ)YruDD({UnUclRuzY?u50r%dT!veR{n$m9f-n%Y5BxqldphZ$Y_^N~=D{Ks!}w zo5_`nV-_L(a8|bd1-r+Yy=Scp1B&CzT1BohasFZ*1w9&Dpl%pBBW>JZQU|@LdZzSz zi6Ak&cGYl(QY02O2+-@Ty0xOPL_g?xE16U(<{**B@JU*{w*t6~M_sg7p{blmA}o^V ziIFTl+0c|^-oTOa?8HspFn%%HG(_}CEoP=K&r&$=JKtW$o_u|?hPc+$%%I2i1X!aY zyF)KLi*Q<-xQPuRbng-_UDxc6qF7N&a0u77D=fnaj{Z-?ulKROnKVJqQZ@aNlDO?o zd{X~jFLb&KLz<~6d+<=L5i`!hNP_(v)XXh&y-aFC$4YZOpI%DSXqiCAAQ$)QICd3G zu09nDDVPpx(IR!ts)bELI}5IPkjgRZt(tXA4UsRjzSoyDJL)Lb1ow=Jx}f@egks-w)2x86}ea(Y7>G5BNdky=X*va?oih6O0zzB z6e|L0_|Kx^*vw*x@0*8#V6f`^yi~E>~F60@kQzjy&`EIX$ z&U`ACG8zOmO)KSQ%);-g+S9Q8=2n7$&HmY#eL1)Ln_9wHu}H_J*l%5jK>sgAyF`? zYiAfV_(S~lg&mFJ6>fd?bhf?0`Fi*6=Vwx~wQV(GezC!;hk{z9={SF0w<)es z>Q?6;AQcORWQ9qrgUP5i{gf!U!lhCXC2?7Owh-t=k7uHcL~ML`w(kQyZ|qObWiuXh zF~zu6mZG54BNM_cW@`wCXL`@V9}_smk8WmK9Cnq?ylNJ&bheC? zE&^?aSpWJ{Co0o`5bZhs)*Vqk;KWsLTg8=@h_+Un-nyHCS^oM_#grp5lZkn?RPV&Y zc0KDW^D0e@-mW~O^48{Dgp_yz9TZv9ym0@dsVS1cac2f;@v7P>F(sYR0!i2^^0Mnc zUUTs`jZB!@o9^09MFpkFQ&F?WRNPh<>zr3U>@b*_wpA~H@MXO1o=%cs4$JemYn^oz z$Bu{dMzuWsaPDywj=0uEm#*PcnH7FiFM8h;-vT*qtOQ?;7-yj+8-5>Xf|+21#3sXP zZLhGa+^I(_R>>Ta7av%iGqjS0*ZF=qWOG}tedX_K0js} z-rRV&*f3tBUWSyaR&gZsfSJigQ4JwS8%d8r?y_8xS1}3cv|P`X^`Hba?E4hco`&#| zhPJlx1$FL7X)6oKX$Y1|1tetSg>bVnKGnm(&M-K7QMvJWw?P*P|my6@bQ5%9yQtDpWUjYS(H25{8&jN?(}5ic+ET&hp^tr%H-eD z#4M%UZsa-%m}}$Q_U&ME6*QB495pS87*x^;Fa&f$=2H}>RNYMXxkMh1VwU4+&iA}L*JW8*KgKwTeOV8a#Azl z_T1g;?OK^L>I6>qKwCzKP`NO-5?3Sp_VJpkS=k+VM1gjmNhknLzQ)3vQU9~GQ>h1L zkt71Kb#*(EO5%r-iF?LeOb_*w-F`N2*I_i^30K-_~ofC?une070k#l>x1*<8YOAH-f2rG6M zA*Z&`?VO$cI>bn)cEKmf!Q4!Owp+p|oqR_=inflJkyl7Mn4{y=)5t_u+9Fag;^{uM zl^3m%qYQfYAG@_8S=D&Ik=NHjz-h85qmt0PkQSl8a76{t!E_XcfV?bD${SZ6M9}ZG z1uV`ZddPQh-Ld1u9|SZc1`WG!5h@>RRGu2AOB*f}uK&7E z&;2p7rEcxe=X4E`88gYKyW3&ZzaZ3binCoP6`-GKiX%L`V`}c*G-pI1eszwD>apNX zVQbR49o!m9n zC6;HXZ}InLt_QrzD!GFDQpbhG0qjOdgIa^1qgm-%4oMCMdye-Alsj&hVZHHk=e2+$ z&xrTufLrPq`L>q{zuZxn$-!E_;TRqWZNAj9oQ|7)fH-BG$Z1S$iR#-T{97l(9^{fy zv$>>fCKj8&TPu|b!p_1HIoW155C_0UI2_NMouL$G+vR9bhCRZIb{L3bZ8AI!&4a&? z?=-Hw(lwkNeAD>lw+AbqOgWF7Pz!$*ibtJQt))e=3C;jJ4wt8!X&JPv-@+;tlp?W6N|V+@!m^+loPDS{9LLeP_WNTZ@=KrGj&UWRE zCa|g_Ayf3G?OKS%z=aii2PP&S8=#ioUMMipvmgo`gdh1-f3W(_Zqah4W?q{{!Q-15 z*7~NCtCjg@aKwLt+UlyOgHj8Jh9bGnATI}iF)LmulmgE?9ey>>D!``xj_|o_0U#@D zbS^PIXk|J$!S$MA(mw{ht;^O(22BA8`JWWXbJI@f4_k~jeG6T`ISl&`HQG4l{;mx9 zLNGsobl`%+0E1Pj+5FeN|3;hk!l3)leKsA0ntfL_4m%Y*T$RKQMs$V|*iY4w6DJhZ zB2I@jFMnzf=r|TWtHw_@g-!~HT$EzyyO_o{jl?Qj9wK(sFNSG0`|PISjlcg$9aexk z3r^9O{{oV%$CpZM7+JO&9e0(zp8U|Elmttik#t1E5 zzfw8GD1(DsLD;G(eSIGP5C6lNpqdhR+4lN&GO2_&ttmT&+BlXM15g0O)N3}G8bx4? z|6)M2$;0t$qXAKLlz^emjk8Ugry?+@t-I8o<$5OsJ_*x==H~6NkO8Z5scc;SbRg=( zcAd8$NiKxNTKk?;(+|4}&FWfL@9ovmM{ei5yFFbi;t&ErwnUjq(=6`D(V%s2fQq*+qjd>T3?=y>OG^NXDTzMdge&h#tkeMU!U#v`)8AVW8}ZA z&i^M(Rew*9pkA-Y~xC&0m;(UE9NSI|HTP__M_L+)Faz}NE=iuHH}>y33>f~ ztC?uA4urComr7O&=;Fg55f3Ve`l*v5Z0^T;`xeX@tGKvDJNt-CJGam!zmWFk5xo>`O&(pMw9$c_2(fVtim zc}4+_Y4IdrYQL44j>fO|b^9kKCKw6G@lZTOPNBUsN?4uqokz*h1uX(UaQH)c^>5$K ze0^Zhb57aoF$iv2LqWy`p$%?5AZQcU#g*YzeP9Z*dubTVGn~lJ zE_GU?0k)jb{1s%RzMmedsB?x~E3QQTz1vpX&cEwfmbOsB#bCK;0U_`B^`Aj1LuDuE zJ~3D zFUdnClR}-Hz+uR*E}6o@-c6;d82bLIk+k%F5l=^V5@mH*$uR=AC6V?$UYbSk@9$^q z@&xC+;>uAKI64U+>s)H=#+MYO9w7ipnS?IumRGWTq=%m{1YDxP==RsY8M8!?06?^T z*TC;v=TPQpXP7^MC0D7%$oqiMDW{?ylTfap*!VGf~IaQbY`A2 zf%{&_z9X+Nn;%@$u|?VQ5e04s;*&unUyJne4rqt)^2jG0q-Z_Q@Kmt ztY%w{c;5{H7yH1VEx=E&&@@zg(>XTm>h?pV>dyVUAyF#5 zvg_H25?5x8jg2JQ)XB+q7k{1*IV@l5<~CJL;Ge8j-wolm>HuRK*?9iR@1&Zip&>+C z&!*r~*FEpY6$l&~nXzF%<<@D?>edC^&SYJ~KXHA5>D8ohugwY1FINm&a^0uBhzjVF zs+-P1tzP1&Tgb(qNX0E-z80GfR#|#|B)L!FTvl27EBBH?gNq2%vS_+iArO+kMEWPSmhH>JPfP3B@Uk9*)$BG6YT9%EEG?QPtp?vuthal z&|Q*^t^9qczt_#)dts~!Wm(Y277j;#WS#Ne2KIBwf)%-hmsm9ISuK*)V{lR5Ep{Aw z51Nw=Q5&mX1BBj@m(4Z5AzY_$l|I+F^}CaM-`QwzE`^^&sK%K`ung0yDl38N2z%YI zP_G4Z)s_w=hM)WK;OiSr1QJ5@oEjgEV7_w=E>6Hga!P&mGHfJgY1peb7)~tTeNmS3 zq~x;On{_HWFM6*WLF*qcHw?Cf4foStwj^+o-^=BqCi$^VV_^9<8f$&%r^CmuRa|LO z8=|7E>TULz)EhfkOonJpbaB0ngSq)80=&-8l z9JeaE8?b5!-rey*H2c?m+m8G*;vQn;W*^nKUR&aB48;khkIK&n`PZyl067_i0gGMB z?-jTdkI7nI{-ok?BypY@(f8-MJX2(tccJvgECpkoqCl#dO{y=J5`N6x0v1Xhdxac) zQ`$t>i66d2TGMncckD2iP&*!Vdz0J+2j&{BQ5(LrGby#4=Vw3+Ea!;_e;V~&Dhtis zhNu3zx>p{U_IX2@as5;crGG+eMf!Mmnw{3ilT|9V&XSd->`6`GydJhZbssI*YJeRaIkAH zE2x{vE?(&i6-YV>=xD+n`+SS!nE*$XjDfq5J0 z>C>lI8*t=>A;4jSVuVq|V%HVXX3%nYD{*1Z7kAV3a9M(|FWHN(v@?!hfkacysf3Lp zuqxQGzRFEL0qNF?Bj=JYW}Gh)p&BCT&FETv}`Ser#CwZT0_z0ni>dNfH zaOAf6xmk;cNQ?MMAr}c{X~*h73#|p3LRHKv7QXgEZ78U|^$2-wK#Kz=cRI6r4K^|W zpCmMisoPE;adKs@-?uqU(gIIjE8xFMOCBUF;w7wMJ9`^h3<~}_LBDzF&!okhB@ilW z+l4hUNQQAspGM=EcpW?Mxa;m=>qF^gD=)SB@!@pW*o##sGNsdf%_oszlj7N;;nkYv zprVb^0hiASJ(Af`Map=rqig@#@7&?efvZDgJ?DROxF6S^q0lt1|pT6h2pU3CeuZDtpq@Nm9jQm4d zr1fq>p#BMKhY>;!m(X|wABC1QU%k=o|(yk7Xg{fx0J)w9Zhq##zaCK5CoT+r|c&$ zb+iO7Pj@k6n=}~#Gf?svUG4f1ws`bQ40_vXSEp@ZgK6-jYyJA0S{T)`;aIaJTzHdN zCS3Z<JHDt2aToWT+UR<3mS`m=okGFtr z*UYrcZ-qx*J8ZQUH%q}{Z8cFRE!c0JO(uZe!Rs>06XNnl(P?a>TfI%XNot__Ft8-8 z-eoG;nC&XEFpx=$kxWfl+)3d29bp7wU8^Oein8(aEK_1&+fKDp$AN{q0o8U4BS|uv zSzcOoz&9VNC;E?zK}bgdZBfNKi8X;1soP~k#42VQM}f9M34Zd#*|e}MYRg)EXs6}c z$X^eEWjOv=Ye+O`;I#I>z)_rV8}VU{!eLA4`d-ju90<8ohIKR$ zP4WOKsv*uci8sm$c;#>!)~t@{3pAe6ia@s=Bw33>Bzv6|+1ZVsZ4qgSgfc{2}n;@TAOlp2@e)9z@ZKFZ~9#y{dY6?hrXWV{qOQ zQF__VzP^+rqBYKcLIe=llaJ2Ys%I}#y>o%I$_RiHn_5{OVU5Pzv2gk#s3J9TTSP`+ z&!E{w4YtA8UXj$+L0U+88))GFC0TCNFSjVFjZLDg&Ks>vL{a<(985#wtQ;I1Mz4$5 zE4}@FDJcr!g+LSJ9H%&#(hAc}Cyz)r_?-h3%N&sLndo4}dDR+seg)ST(~ z`fJWga)Bbtp+mc04~Wu}^t@Cd?rFbz-#(dD5z~Q)>G+nM&s+Wb?|YduI8YjTXujC% zJI^3fJ)=T*smJ`fRZ;b^wvZMc?gTWCIxfMc4AK#pa9JJ~BTLJJW8Ujp)*7>>N|%HF z{$mO%3;eOWHAh>?YBeHr7;x=EXf15MEj(d8(rJQ*oUu0Jyt^5>dcyXQU6~{52iXR- zOS?lIsG@*zaN{Y2s1NtB?M!RIXb6$-@{c4r|He*zm{)^L?|RdwQIrCYzE~Ummsg3X zcOio=`vE+rmCG5pr&IK(1+_5~j*-wC;)hY};5s^5H2V@zdY~5`mMz~AAlLI_>WRf_ zN?xo8Z#+Sa@9Rtkn4RpGz&Lw)VO}hd-rt)1!edvrvy=6cdwVtQfJS7a9(${X=${0F zYff-Yqe+^&$x*7eg>_v(cTW+f*@(~*MQyPE6t$aD&$N> z`C%QqeWpu~XNuJ*l^^rtw*6Yk;BI-gzZc^5R)SaGB=c8KZU$88Oe3cWF*D!c#C!pu z2%N4~Ys;flPgJPKX3N#yvyrqIQv0G?T@=?!;NPj2)}RMxjQ-TPn%&8}w>N_o0VlYo zjMfD(D>|HX?9j^(5VOjpb-(dMCphl{AaYOm!v_Yh=QSkin|%A;JeY|0cFnyJ_WJ#X zO8=K?67;LiLr!chec8kt0t#KQ?ntMp_OA_(F(Xb=wdInG{U@5)H%CRC`tlq>J{4%ksMz!UVZVD=StMO;};81}~rbYkV#xZ&6v2f8&gJv;M}xL6jt7Nk9$RJ!Z%tF8WZA0s z1sAW5JxSB>-a}d&LE)PPC~jZ@dK@D;jA`c8SgtO$#<$fFwC%B9M7^zhd7Z}34;TZL zh!4FBz`{HsaPuFHM$Ha-GE)zPJ(?c~@D#WEy5=-QjV5Hh437eg_ zzV!E%qfbDAe14TnHNXlN%M+tL(-&LYvl5y2b!)mHMCx#Y7eVOs;oO6jVRhuKf&emE zF3%K$Moa4|>z2Qy8&x}gsjeqJiMJ#|2NTL5Uq=#yY+)^X!pG(vprE0Fo|{rbb+y&; zdcrA7Q>{jV(ng5dTd!m(1*|Vk#nTSsIv)p=`!0?Q)Uq4KeSt+fSSImasbO!;JkzyrCD#oCL8fdWvYDn=3H;jaz5FG-R z-!L9l&{qP_?oQp=F;gm78^0-_txO8^cf8P)%uT5P=RJ1q^+6u9oAQIzsaddauwVSC0g5_h4 z_@(!vM85P*>>L}Oreqz5@fI|7L--05@sq~JrHvJ46<40N)Vy`T9V>cTDo72H{92S~ zQ?nql1!?Q)aQ-WGxw*{vBUqjqm?(sT(Bsnt1npDJr z%yT0l238d#s{u8r7n}b`zF54A8IztU(ldpUor_e6V@c)c@ht#8&i@WOaw)An_k5ay zLJXSJ#|JIQV^yx1uti5vIeHT#A`GHIp9gd|`M6@{kS#Mfm;K*<^IWaIhugzMm5`u{ zSF-mWm_;RDA<6X;SyOy5qo#*#gEj>>VCwuP*vKt$XTK<=J}8Wn1DvQj)oQcL-Mc#w zj#z8dm^Z$d{^LQ=ve}S$6c#H;lV_u~MMtGjgx6y-ft zVRftGn&mG_C!n26D07nMRb2+T%urBFW1yY+h47;Aj$6-zUJYdkHbGq>X?(-PwJuBH z%=D;FN_uUNx%>d0s|x?+dFtIA%6^m0xqyv72y|;c1##E5-I&n#IJCG@@a@5@e>hA< zSFusa7&%GzD)S3WV}+PVMKb9O%+I#*ZovrDlCc`kv8I0!WeVzW*Zef2f2B&L6;JTKC@ZJ?REqd)5yoz8Z%IV`ZH$iAWIKV5 zn73P{3AyXB^`W4j=OxB_V8I`WHQwa|?WDDV>>!WT%EJ{`2KLFu(#RQo;T`5^+>T@# zO@O<~q&hg*)w>5K?pexydC{T%vaAK8F7vYX@g1#}yW?msz`JYlo0>I!f|C#%=;|y? zQLaOJR%%f>M5EQK-@r!N{YW$igm-=GOA@Hnp0986+;$;Nw5g08Q%$+U86qenx*z3c zIBkS3_|G7a5}3tZ zQuKZrRl@i{tA?!mvf6bEnPVe2{2=*>qtjX_u?3AsdhffwpJGV+>QAo z%FTN4YHst~$sb=!Uy>4*apfhOAcP8~0@j~c|0jZJpL)OzZMSW4zp@1mSK(E4OLAq~ z@1F+El`D%TJtF7B*$TgFciI9*9u(R+2M0snngCc;ZzCRpsCPRl!D$O8NWF8oPCRhe z0yBxG{d{n6cu444al(mJ&i@gC%oW9cFKZ&GseosP{UI?(Yxi;E;>{DZS~vZTQX2Dy zT-8ptQ-?lx`^dS?A-;L;Z?McpW-denx zc5Bd%JQk9!Ku!ospRrHb9dYg})(56bx&I5`{U(sK1}xI_07>xYNp zrS4q0bXMTAVBcu$I+OtF)&PO0X7E9B#RO*Vm_H8G@&O z>{O$%?J~EPR`at@HI-eCtk)!e?wh+sbXAoW;z65>9zcu?{8B;Y>rYq5bcDMuI~_|N zD%Z(fG%>H%;X+V9Rs{sjll$ccK9&+`RugBzeVmTcjzHhM;x2F>7F_480vkBJ`9jgR z&s5Jm&q{=ue3r<$c~p1xHW=EhZpVN&z0bT4zy{8JsWe3p;|bqvu~JOpzR_m1Y TPFNd4G#+Mm&hZz(YJU4456HRb literal 12070 zcmeHthgVbC_iqps8^bt$9Sa~TD2@oI2pAxM!wd{6prRlhl_G?qv=9Ob_6iIjy~Gg# z5t1k^v?M4>Cy^Rz0@4W(NJs*yzk8i|-}T;l>#g?>3~Rx;Uuuw3w7zpcE{%&tLds^Pk~#u64Jq-qpUogEmeWB#Am_*@tr_ zE|F4~!lJNj0i&&a+ssvYz#7PZii6%FzzfK=2n7ppJemR742~OjLgawql~<5W*|>^f z@LGyyxo_t@+EjIQgy}dS==-4D8PiniTz=|KzO7Sh)!u9RGnkL}HbNy8dOpKJku5)m z3>{;SqOc>k8{zUM462Z?PmBnNqAVB8iD7VeTNx3I;ldr#i@EqO9NBq!tPPOa&&`^v3n6>aAQc}W`9_%exe>UUEe z6VH0eFViVxow|j3l;QAdCv-VZdAiG5eUT>=kWFB9i&cJx%)s^qvdCkdfheMn_IDad z&Y%|8^;$XpEm#;oY?Kho&35u_R*P))%_c)-0xUBdD_NM4Fqj?2Fo^&oGea#%?FYkC zbjs*?G9z>Hx;cs@?dYL-x4lf^P!WOIMnTfFm?+M8CQOv%z1~u&!>G85uUir!n}><5 zMdlHKowN)nH({Z^CwMTXT!;JdzSJPmT)Os(>Y0D@oq-bK)h`839_xz>6%+VD?+K%+ zSQ(8BZoW!h{?!_pzONV+$&4i|&0u6AabU&xkxb(V7eU#1z052d#r-28lugEb=W<^k z7LJ+3l=2aSK}1*`Wx`d;)rv^0WSK~&^f0Nph#{TqAmOy?={ZZ}!iQ$Z_LID;IL9!F za!AL?X4O75S#2OuQU&Ezr08R$;@X*4?Y+f4Ws*n}-*Q`K4xlLKv3j06mz#FnIaxWz z>#~`kMz0@c?mis6S~oN`B{dTmd}aSy&P3!>Y&xYX0FvUy1HZ~Gw}rT;a;Y-n$n6Pb z&Zlj9lW%s~&!HEyN(i0Kez_@n=<%@ibv<;}PP;Q^GkGWveC_;v*08V{CTKyB>lP}R z(j_D4;;PSZ7O?6<4`ZDo?45vM$vS50ee2`tjC}LVOnuL+tdS1mSX-#{b52a#mh~@s zn6$>kwLi>6gS44`SI=i#vFjao)>)piENwDWhbW$$F8$Cnpep;av)0qIKI%^JI?a`{ zSVBp4u!9ye`MZx_`%~H8?9k)=k$(iQw{|*XB)rg1TeOT~rH+#s`@sFzB~6y)gwD6F zPZ5GUADf?o7Ue}-jq;p$Qg+p_ZVfv`;6u%U+9aF!GLraw{>Vu#btz~ha=2rmN`%)A zuczrm>uO=c1MFbDH%sg_eJoU$BAz15lV=|8_Gr24O z7vfBO+HCfkqhqMhnGQJtj=>JlXO*^4SiX#&4#8*#^UYnU65y|f+eWXSMnpr$A54@F zPG=vsF%zuQrr##2r)bVCEOcz#?e_AyUd;&;cD+OLB#*#6;0Y5fJXtmDH{FuBKdFQgYwHoS@+CYxd@>J(&hCMK zF7T~i`}TZYygPvYc=W;tD3a5v#Z6LH&je$OfYxOld#Ids;cV&D zhvsQ-hCa8fvx>b`83XJeRb9ILxR74#BU&V~cw(4IAl6(qM;oPC$L~wcBr=oqa#vDS zk(15bIeWG287ziON6mks98cVaTH+-RiahzP{zZ97GN}xWJ$bWf)upIw-8JdvJ3nep zvpzc$Ie7i?B1(si`oU2EAYFv7kyH(*XcC!`DE_D*7KG!7s7_{t7>z)HU@*k=Oy4(S z=ne-Fg9tP1!RsRNN+Kq3Ezc2J=tQV=O!odL-e{`^_jDHxb4~D9j)7RJ=T{OIJtA2& zOwu+A`VM|=y5a?Zu7_su_oCAo82WvreLfp?0+TW?`5tF}+8(XoRdCsVO=xSx{PR-2 zmGV4Hn^mspQpCPhm5`9I)SR1}3tbztEW0)Sx*o%f<>aYV(cWIl=MdD5122lAzU=A= zrH3q0aYU)9wHoi~0!2oKuDto>zVRcn<0Q4xiV_kPSHnD%zXCm}5DUSWjqX05) z{eFMH+%;M>i=+!DkRqo$kftFq6Q7r`n$DbW&m)+00j8eri6Oo4M8z!Ch=`;#CZ9E2 z-sk9QK(f<8jI3mP&@j9bPrr5@;)P^W@$$k*EFA_ylKUZ1EfdBcfB_4to|~X?;0jV8 zW3)yNhh*=x&r0S}&5f8M4@vf`)!vSi@oFMvn`9u^y%X`Gjd<69$XLc7g3tJw*HzLv znM68eY3Ln8ril~{Gt;O6en3=H8T2nwOV%`5tgu|lK>7vOE%FztTo+F*_MJnas9c1tuQaF}lN%!ItU87*NMwQN!;#X8uT!LYC;eUKb*LOe|)04H4Myfr^=?#=c&X?nh4UfUZyWoC;WdTR*N8(n2KE z8XJwSPAm{*WaM3e}{F8gG<`qk1oghZc`h?Y|9W4_JS zvYlrPSd-eGPz}#D@yZ}_MvcqcMx!;GOS@jELQfUtg+}K^-3T@u4Nq|koqI?M-t!e; ze)0PZj)D12Rg^gqDw8OCPsAHhH|~znsbiA^!}|w5+zjIxIQh1%XibT^dL6EeG9R<` z)|;UVehF~l8dKPh3;Sl{5c=t53U{@~ekPY4W35|1d#{jb{`KV5W2tW8EA%k&Zu<^% zSq}rr8m^+QG-D2`!#EQLL;-jp0nU&UIX{OKuV`_qh!lev&*jPmB3sU~Oy(ZRya~{$ z#>86dcF;^VT2Ke1zVfDh@ZOQg@@Dk36Zz#tqWQr@Q_d|zfKh_V${p_emzRh#mg?A2 z5V-Sb2CxSG2~kfNgHENLY>qMJ&d_L0DwLC2d8ZGnuQj0Mm)Kfn$nc@zVa^x5oIimx ze=H<+xPM)s?w0=HW;5SaprM|bBU-96w^o&QjjZLf;G0N!e>=_#?Al9UIacxRd&!f1P1$7#C|N*Go&4C({6ufvr+_gLP=Lfu<0{}lsRsPutRV`( z5O*(g!*=lhjrtpY1jnsMzTda_Xm#BERBzqaXsZ{h^DyvETx#QH3-IFqs+~3*06+fE zg@0yL+5-Veo?h(Sc_5>yl>u?m2OG<8jW=QpNQ9taD@`g0LY;ttGU9!7X9HdrZK{;^ z7CW%;VV`$cd-vEH1qGnMIhW6UJps{msW_T)#J0^-%1L*-{c&Rrfa1{QkBT!d^rlY~rWXLgLAPV0zC9Il(kX5QSdf508atm@>3M{<9^MO!4hM+v zyBB*t1oiK})w1w`#L-Annruwbyan)S z`K_+kSB2}~d?#{x)#R=LEFS>x^xOP|`2>u6)oA2!P7J3t(;R~)Oo4@@(MH5jh5z84 zW;O1#5e2HK9&_85_P?mUP0Ep}=bqvljcN=a@0^MiuA)W8fc*5EE?cA$cfatc-@)57CK)D_ID#2mhZ*10E?a|2_xBVQhSI4EAd_P7cc5DbCfmxH}=~(G%5mdT15yn8yOwE@Ng4gzfW?Xz$>kwCGC5Trs3Vvuf($$p&q)Qd3 zU?i*@m`uyY&UEE-VDdPusW+n@Ny7Iz6<-OR{p;}&o9WK%bWNao zK_@-eBcIXtqjmr!Wz$G-!DXQQReK5WTwTzXX&M%b1w!%q5`vO_9Q%VC&XGHCGtp;h zqz3x!iAJ>T^`(V{QsmAgr+d-OI@`gs^s)rdk6NrOXGF867$|lpK#d34JkUX)<5|Jg z+J+I4k%l98iyevzFBf|^z1RyCFE?fn%}E4I^O;1@xE;ML zWD&5H{Vv)F2GUPZ29Pw+x=a&9u%N7jKsNT23F+KE0@{Eg>?33i&28Cv#9J_mpsmjJ zgpEef`|4FzSRR7fQUD1+h-|K7yP4kk_F1zP*tY?a2mQ&%WV8o9wMGZ91@g#o6i5=y zs$mxZ{WYrSwecDtn{mEcDo=@+GIWBOa8?zy@ByrlMg>(w;>;6`gciW7X`m5E;1F{G zIt1PA9)5`c=f&@b8P_e>5QAF zeA-@)#Be*2lS$U000Ev_Zd9fbDU!w4^7OXt4-R9s0OR7E?g2ne@m?7CkO>?nAccuz zggscQc%92_G9sUJD7?^$7A+c$H6;Re0vUbk!A`ptpu>`iBQlKX?g)1RWWa|r*$NuM z?Mg(S23TM$q3dDgph138+z4 z8O=;I3C0IvFk7G;bxSAc)i3qhr>2N3SfL`=o!YLKisVjLhA##9t`1tTwnr)qKS|Q| zGtus8+ZrMtRFp~EAqV38QQ{*F$m8zzEtq)5@JOVhn@=SQy7IyAy~?dsPkjx^i%noa z-(%)-3W9HWLiSBJAWGyAcLv^GIcAVZ)MoHvaE?b+k19Ni1H>RW+|-6KuL-6X)`$x& zP-w7PAvms;?2>GH7IWwTaNhzBFNU1SD6WgoGg6Ur3iovR;uXSAGF~Bstf|Z*asL?Q z;?Sc;1mzZegvt)>jNqT@e#=6hXZ zuisXa%aAKv;VU8JN7brXu+G4eu24W6^2n||0pgal=ESbt^)#-r%YrUo?OhcnEU{Ds z0XPTtNl*v}Z;>DK`ZR2DmN3KlXw|Sm=y@!4*K-VvnId;+%;LdWKn74eDfd3@t`)Bj zMY^OAGk0f#=3k+Qd!3mbyXsb;z1{Ix>Z5#NbZ}AsAD?^~U{tn%@9l;REZCat`$BP% zyZy0z8)rdG@mt|O@9FvwZlKz_+|4jM1cde0>N{|sP6PV88ao_(i=m3#!@o6lC2K%> zKF6EQ$FHGoEKTg~>d)i#4E523k1mWrN^wDt$zYoA73BHlPHEx?UZ)Jk+ zp@eb)N2+)uhOk!}{f=mQPaTl9Xb;GQ3>0+DmPOMgC5|OX>BHTpeB%uqMbb2{j*JgL z|KW=gEyT0)#Rd@7UbB_G1`yG^-T6Y9=J|A8|4bG1b|Vezgg>mCyN{dn@ftno5s;pl zO$PyCNCp1jUb$0}5u5u@wBJxY%@hbRtU*%nKFxw7qg?F0P%w--YSh9H(t*(Dx`GqGC%l zJO)y&R|ZN*Na?PGeePj+_N+M_;)WDHrU-k`tXu>WY^ejl;y?yoMAP$G`y^V%IMu#@ zO8U`;ympNx(75gAOJI;63=bJ(N#P1(ZGE#N_*)9Aqre@=c3fKRmOn`T6c`qv<6!%7c9P3apEwavXJ4#k+g`DzAKQ<0FCyCW@X^=qHDt&c0?X`={ck3=jLv1z-UVzkc#JH>QG|wnuK@=opW*_xF4g1tZpdbuIqK%Mzo<)Fu9pNEFJBx{ zcq6*=&cCFBhazSELMhw?W(;88fqn&i-ale6$RCc^-&5`@IZTnUnSR}86_2!AZ?MCn z-G9glw`hu20XWZ~eMslquKe<5^QAnXfis1X2{7FFkQjX$P}8-Wmi70sV)%QM1{O|x z0h2>q&Yx!}yvZYv`u}d(`$#1yif#icOxh$A}%!GLiY@D`mTL$xEA~8syYlvL^a3B#$7QbH9@q z(Tu4f(7p(D{#n{}j zJa*h}dchVvi&Ilmvq_flnMNjy^$vN_^Tii{-eX=2r#Lm#6lb*^3^zv`-)C*8F9TyO z-9t308a;5ZhMQo*B~nJy&BaonNps)q%r89@ggZjPbh$zP)XgSAPk&Wus+5kAoYt#f zzL7i$a4{-fx>z4r)_NpUJZ`BUI@yw1i!BiyIRI#Yyqwv!7eYj{`kgCoUy_Q8%x9K0 z%#zimV?tBOsy;HfD-5W#d7QtLV^2`lJ4ahNb7ZaD@Lzw{ae3Kk#5}I1mMf_eyt_o$ zcP}5DF7EEtJohspl~i6S%8xlE!||)RfdG?!*B4-HVv;U9j>-uzP|H;tZ8tGC=6v3n zIIYGW@Qv8t_G$#hE_*fbP80@&=z+z>MSnQ_ysNA0(C|>oIiGdzIRU4Q<70EV z_)0CNHcuc9VK#$0OFbI-`{G`Aznc<&r@_|;HZ|mZewW&a{Jw{@_gmv)MK8T716E-K zJAL4RhULC)x%d4MlR!g`aOm&`yc>z?7Y`TlTtWZEr5V1Fdq4PRFMq zcL2Th6f)q1`?L3`-4=Uo>-?~x4g~m=1wgXVw2rlNvqGZ)~`Gucs`@zkRX=iQi*Yv{qrt@+;hwC!Y zq<&*R@NmPj{*AYYSC@<5xZwD%X>aHpJTEQ&TuGtJVysRB(CRawyJ^$ROw5&Gs}@a( zo>~A8>1;`|L_g8-YfUZy-hW)mLr%|yM}~U?%w!(+jbO6Ymx$p7lMnp_?o>tEKP`oj zf_W8jZJtv}Uu~j^iHS%|9Fmv<;3~--CBTSlHOJAxmw*QU!|=$+(xpZ?GAL*f*;iua z6aNA3-7)*2_{y>GpeoRDmPQShofE)evTQQ|3|Gc=?;QXa@-?7l{7$8nLg92JM3lH8 z5P%?nUy!}^TWg154cv9f^ggr;x|tqdb<|r^l1h~1WUURB5=;_$~wCs@Q36)f)DZu z3%d2a-w5~@hr>06f0G_O!LyR_NV?<4%!MD}|CtGRNO94S4h;=OkFMowm-c6W8=fr( zeSQmO`4a^Gg=9tR@tgFd`PfvN2(PAGc{i7UfmwR7XNiF-03E)}7CyjvzuH4by!6I( z#f|Pbk-GFtV|!X(o^ZHB$=po_mV>W&6}_7q$U=rvHQFosm+g)z6$Q!E-UN)EBs`?R z>GlFN^!AxqlYmnZc&3y723;)Q6+Y=nV#y3@_a6YBhv15UO2C3HVRZ|TuC742UCUOe zKOTJ50oO}e@?^{S%Kpf??l_%-5KM`|Wx_ZQQulg>Lx;zUSt3U79{&Nv_oe{OuVsUm zeh*sFh2dFAe@g+dcdl|sPoa<=A=e|RZ__y#1%dp~3w{@%f5Op;?Gj^P=@HGGBL>AK zVdP^y@~1w5=2*aQ)$EAd`}t_YkVkEkXQj&`ne0ycG2=*x_)^H=MU~m;7>Sh}e+y>L zpf+S|?vs2Fm`dXM`!37fesw91cgU-NmK$4#Uva{bD@b@;Rm7K%_`|9D6J1ZQDZO#Z zIWN2D=Y2U`kzpud(fx^aS6g{%xXBw-$cUJ9TD82 z1rcv-4h$bj-Ny=tmF`~Ol(jT`RH}4Z4cxT2zxn{~pZzqh_>R`tT}C##OAoeRS$3d(T6kg>Nwf z*frzd{MX8st2yx3GNz8C-8=D zpBy+r@0c}P0a54`1qb;2AEeD0`!wW5W>koVJX}ZphkRM>cEfrlUvIDS?_%Hm^u9~! z?7Z!8(0?|-VeFdvF7(S+gI&>jM&i^^GPs0uy_e-q*pBR~W}IfYIG|O&P@K+H?CW?r z>YJ`K9RW%_a%|t&aC)y?k5>TO!blKR@O#O}k>hVN1>1eq{=R>HQ^V;Uat>Mwg;p5> z)BEB&_dR3D<2$}qow8cpf&pb3t=j>a%`v?G5@K+^m%+d*Bh_#8cGAg~wh=>6z_O%3 z-YebL(reQm3B5x|P|yzTAbjS6Xt{Xr`VL6l_9F`XSbLb|$pXYx*Ib83-w$?B<1t9& z-dk<#XTi^Xa&`j3aiQDWAp<>LMdEJk*mt6Wzpf6}GUX4>{sr#cSnAtq$hE{ZFl9Ap zUmCv11}-3w_w$?M!13Yt6%|s{Qv91Zixe>{(Tr?@$ugc`|sDBHXHy^@!wYc zzaaVV`;LD>@-ImKJsAIH3?MxIn=$^kFUJ0xG4B1FCI8Km|4+WXAXB{cV4L0H%e8JG PwX*r$?l=6eUibb7qA3my diff --git a/frontend/src/toolbar/bar/Toolbar.scss b/frontend/src/toolbar/bar/Toolbar.scss index ff83b7c9d922a..ec98167332fbd 100644 --- a/frontend/src/toolbar/bar/Toolbar.scss +++ b/frontend/src/toolbar/bar/Toolbar.scss @@ -144,8 +144,4 @@ transform: var(--toolbar-translate) scale(0); } } - - &--unauthenticated { - width: calc(5rem + 1px); // Account for border - } } diff --git a/frontend/src/toolbar/bar/Toolbar.tsx b/frontend/src/toolbar/bar/Toolbar.tsx index f255ccc26800d..8ed031f904d86 100644 --- a/frontend/src/toolbar/bar/Toolbar.tsx +++ b/frontend/src/toolbar/bar/Toolbar.tsx @@ -77,15 +77,18 @@ function MoreMenu(): JSX.Element { } maxContentWidth={true} > - } title="More options" /> + + + ) } -export function ToolbarInfoMenu(): JSX.Element { +export function ToolbarInfoMenu(): JSX.Element | null { const ref = useRef(null) const { visibleMenu, isDragging, menuProperties, minimized, isBlurred } = useValues(toolbarLogic) const { setMenu } = useActions(toolbarLogic) + const { isAuthenticated } = useValues(toolbarConfigLogic) const content = minimized ? null : visibleMenu === 'flags' ? ( @@ -102,6 +105,10 @@ export function ToolbarInfoMenu(): JSX.Element { return () => setMenu(null) }, [ref.current]) + if (!isAuthenticated) { + return null + } + return (
} onClick={isAuthenticated ? toggleMinimized : authenticate} title={isAuthenticated ? 'Minimize' : 'Authenticate the PostHog Toolbar'} titleMinimized={isAuthenticated ? 'Expand the toolbar' : 'Authenticate the PostHog Toolbar'} - /> + > + + {isAuthenticated ? ( <> - } menuId="inspect" /> - } menuId="heatmap" /> - } menuId="actions" /> - } menuId="flags" title="Feature flags" /> + + + + + + + + + + + + - ) : null} + ) : ( + + Authenticate + + )}
diff --git a/frontend/src/toolbar/bar/ToolbarButton.scss b/frontend/src/toolbar/bar/ToolbarButton.scss index 0d0bb666fa540..ce480f3fbab35 100644 --- a/frontend/src/toolbar/bar/ToolbarButton.scss +++ b/frontend/src/toolbar/bar/ToolbarButton.scss @@ -15,6 +15,8 @@ width: 2rem; height: 2rem; min-height: var(--lemon-button-height); + margin: 0.25rem; + font-weight: 600; color: var(--muted-alt); appearance: none !important; // Important as this gets overridden by Ant styles... cursor: pointer; @@ -43,4 +45,13 @@ } } } + + &--flex { + flex-grow: 1; + width: auto; + + button { + width: 100%; + } + } } diff --git a/frontend/src/toolbar/bar/ToolbarButton.tsx b/frontend/src/toolbar/bar/ToolbarButton.tsx index add0e5f2580ce..f5dfc755be469 100644 --- a/frontend/src/toolbar/bar/ToolbarButton.tsx +++ b/frontend/src/toolbar/bar/ToolbarButton.tsx @@ -10,17 +10,18 @@ import React from 'react' import { MenuState, toolbarLogic } from './toolbarLogic' export type ToolbarButtonProps = { - icon: React.ReactElement | null + children: React.ReactNode onClick?: () => void title?: string titleMinimized?: JSX.Element | string menuId?: MenuState + flex?: boolean } export const ToolbarButton: FunctionComponent = React.forwardRef< HTMLDivElement, ToolbarButtonProps ->(({ icon, title, onClick, titleMinimized, menuId, ...props }, ref): JSX.Element => { +>(({ children, title, onClick, titleMinimized, menuId, flex, ...props }, ref): JSX.Element => { const { visibleMenu, minimized, isDragging } = useValues(toolbarLogic) const { setVisibleMenu } = useActions(toolbarLogic) @@ -54,9 +55,13 @@ export const ToolbarButton: FunctionComponent = React.forwar } const theButton = ( -
+
) diff --git a/frontend/src/toolbar/flags/flagsToolbarLogic.ts b/frontend/src/toolbar/flags/flagsToolbarLogic.ts index 60c1f568f45a6..e1f41cabca73c 100644 --- a/frontend/src/toolbar/flags/flagsToolbarLogic.ts +++ b/frontend/src/toolbar/flags/flagsToolbarLogic.ts @@ -40,11 +40,6 @@ export const flagsToolbarLogic = kea([ `/api/projects/@current/feature_flags/my_flags${encodeParams(params, '?')}` ) - if (response.status >= 400) { - toolbarConfigLogic.actions.tokenExpired() - return [] - } - breakpoint() if (!response.ok) { return [] diff --git a/frontend/src/toolbar/toolbarConfigLogic.ts b/frontend/src/toolbar/toolbarConfigLogic.ts index 1b4638b8f39f8..853b03bdeea32 100644 --- a/frontend/src/toolbar/toolbarConfigLogic.ts +++ b/frontend/src/toolbar/toolbarConfigLogic.ts @@ -119,10 +119,12 @@ export async function toolbarFetch( }) if (response.status === 403) { const responseData = await response.json() - // Do not try to authenticate if the user has no project access altogether - if (responseData.detail !== "You don't have access to the project.") { + if (responseData.detail === "You don't have access to the project.") { toolbarConfigLogic.actions.authenticate() } } + if (response.status == 401) { + toolbarConfigLogic.actions.tokenExpired() + } return response } diff --git a/frontend/src/toolbar/toolbarLogic.ts b/frontend/src/toolbar/toolbarLogic.ts deleted file mode 100644 index d5183a6734f20..0000000000000 --- a/frontend/src/toolbar/toolbarLogic.ts +++ /dev/null @@ -1,87 +0,0 @@ -import { actions, afterMount, kea, listeners, path, props, reducers, selectors } from 'kea' -import { lemonToast } from 'lib/lemon-ui/LemonToast/LemonToast' - -import { actionsTabLogic } from '~/toolbar/actions/actionsTabLogic' -import { posthog } from '~/toolbar/posthog' -import { clearSessionToolbarToken } from '~/toolbar/utils' -import { ToolbarProps } from '~/types' - -import type { toolbarLogicType } from './toolbarLogicType' - -export const toolbarLogic = kea([ - path(['toolbar', 'toolbarLogic']), - props({} as ToolbarProps), - - actions({ - authenticate: true, - logout: true, - tokenExpired: true, - processUserIntent: true, - clearUserIntent: true, - showButton: true, - hideButton: true, - }), - - reducers(({ props }) => ({ - rawApiURL: [props.apiURL as string], - rawJsURL: [(props.jsURL || props.apiURL) as string], - temporaryToken: [props.temporaryToken || null, { logout: () => null, tokenExpired: () => null }], - actionId: [props.actionId || null, { logout: () => null, clearUserIntent: () => null }], - userIntent: [props.userIntent || null, { logout: () => null, clearUserIntent: () => null }], - source: [props.source || null, { logout: () => null }], - buttonVisible: [true, { showButton: () => true, hideButton: () => false, logout: () => false }], - dataAttributes: [props.dataAttributes || []], - posthog: [props.posthog ?? null], - })), - - selectors({ - apiURL: [(s) => [s.rawApiURL], (apiURL) => `${apiURL.endsWith('/') ? apiURL.replace(/\/+$/, '') : apiURL}`], - jsURL: [ - (s) => [s.rawJsURL, s.apiURL], - (rawJsURL, apiUrl) => - `${rawJsURL ? (rawJsURL.endsWith('/') ? rawJsURL.replace(/\/+$/, '') : rawJsURL) : apiUrl}`, - ], - isAuthenticated: [(s) => [s.temporaryToken], (temporaryToken) => !!temporaryToken], - }), - - listeners(({ values, props }) => ({ - authenticate: () => { - posthog.capture('toolbar authenticate', { is_authenticated: values.isAuthenticated }) - const encodedUrl = encodeURIComponent(window.location.href) - window.location.href = `${values.apiURL}/authorize_and_redirect/?redirect=${encodedUrl}` - clearSessionToolbarToken() - }, - logout: () => { - posthog.capture('toolbar logout') - clearSessionToolbarToken() - }, - tokenExpired: () => { - posthog.capture('toolbar token expired') - console.warn('PostHog Toolbar API token expired. Clearing session.') - if (values.source !== 'localstorage') { - lemonToast.error('PostHog Toolbar API token expired.') - } - clearSessionToolbarToken() - }, - processUserIntent: () => { - if (props.userIntent === 'add-action' || props.userIntent === 'edit-action') { - actionsTabLogic.actions.showButtonActions() - // the right view will next be opened in `actionsTabLogic` on `getActionsSuccess` - } - }, - })), - - afterMount(({ props, actions, values }) => { - if (props.instrument) { - const distinctId = props.distinctId - if (distinctId) { - posthog.identify(distinctId, props.userEmail ? { email: props.userEmail } : {}) - } - posthog.optIn() - } - if (props.userIntent) { - actions.processUserIntent() - } - posthog.capture('toolbar loaded', { is_authenticated: values.isAuthenticated }) - }), -]) From 7c0258cc0e5a346736fdf4a67801810a49b8d8d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Far=C3=ADas=20Santana?= Date: Fri, 22 Mar 2024 14:20:13 +0100 Subject: [PATCH 13/15] refactor: Support for multiple file formats in S3 batch exports (#20979) * refactor: Support for multiple file formats in batch exports * refactor: Prefer composition over inheritance * refactor: More clearly separate writer from temporary file We now should be more explicit about what is the context in which the batch export temporary file is alive. The writer outlives this context, so it can be used by callers to, for example, check how many records were written. * test: More parquet testing * Update query snapshots * fix: Typing * refactor: Move temporary file to new module * test: Add writer classes tests and docstrings * feat: Add new type aliases and docstrings for FlushCallable * refactor: Get rid of ensure close method * fix: Use proper 'none' compression * refactor: Cover all possible file formats with FILE_FORMAT_EXTENSIONS.keys() * test: Also check if bucket name is set to use S3 * feat: Typing and docstring for get_batch_export_writer --------- Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com> --- posthog/batch_exports/service.py | 1 + .../test/__snapshots__/test_in_cohort.ambr | 8 +- .../temporal/batch_exports/batch_exports.py | 201 ------- .../batch_exports/bigquery_batch_export.py | 4 +- .../batch_exports/http_batch_export.py | 6 +- .../batch_exports/postgres_batch_export.py | 4 +- .../temporal/batch_exports/s3_batch_export.py | 203 +++++-- .../batch_exports/snowflake_batch_export.py | 4 +- .../temporal/batch_exports/temporary_file.py | 528 ++++++++++++++++++ .../tests/batch_exports/test_batch_exports.py | 182 ------ .../test_s3_batch_export_workflow.py | 159 +++++- .../batch_exports/test_temporary_file.py | 389 +++++++++++++ 12 files changed, 1238 insertions(+), 451 deletions(-) create mode 100644 posthog/temporal/batch_exports/temporary_file.py create mode 100644 posthog/temporal/tests/batch_exports/test_temporary_file.py diff --git a/posthog/batch_exports/service.py b/posthog/batch_exports/service.py index b00f0f4c98c69..d51dfdb2fbc3c 100644 --- a/posthog/batch_exports/service.py +++ b/posthog/batch_exports/service.py @@ -90,6 +90,7 @@ class S3BatchExportInputs: kms_key_id: str | None = None batch_export_schema: BatchExportSchema | None = None endpoint_url: str | None = None + file_format: str = "JSONLines" @dataclass diff --git a/posthog/hogql/transforms/test/__snapshots__/test_in_cohort.ambr b/posthog/hogql/transforms/test/__snapshots__/test_in_cohort.ambr index 9ff7f8ee0ab49..e0f5ea847110d 100644 --- a/posthog/hogql/transforms/test/__snapshots__/test_in_cohort.ambr +++ b/posthog/hogql/transforms/test/__snapshots__/test_in_cohort.ambr @@ -31,7 +31,7 @@ FROM events LEFT JOIN ( SELECT person_static_cohort.person_id AS cohort_person_id, 1 AS matched, person_static_cohort.cohort_id AS cohort_id FROM person_static_cohort - WHERE and(equals(person_static_cohort.team_id, 420), in(person_static_cohort.cohort_id, [12]))) AS __in_cohort ON equals(__in_cohort.cohort_person_id, events.person_id) + WHERE and(equals(person_static_cohort.team_id, 420), in(person_static_cohort.cohort_id, [11]))) AS __in_cohort ON equals(__in_cohort.cohort_person_id, events.person_id) WHERE and(equals(events.team_id, 420), 1, ifNull(equals(__in_cohort.matched, 1), 0)) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -42,7 +42,7 @@ FROM events LEFT JOIN ( SELECT person_id AS cohort_person_id, 1 AS matched, cohort_id FROM static_cohort_people - WHERE in(cohort_id, [12])) AS __in_cohort ON equals(__in_cohort.cohort_person_id, person_id) + WHERE in(cohort_id, [11])) AS __in_cohort ON equals(__in_cohort.cohort_person_id, person_id) WHERE and(1, equals(__in_cohort.matched, 1)) LIMIT 100 ''' @@ -55,7 +55,7 @@ FROM events LEFT JOIN ( SELECT person_static_cohort.person_id AS cohort_person_id, 1 AS matched, person_static_cohort.cohort_id AS cohort_id FROM person_static_cohort - WHERE and(equals(person_static_cohort.team_id, 420), in(person_static_cohort.cohort_id, [13]))) AS __in_cohort ON equals(__in_cohort.cohort_person_id, events.person_id) + WHERE and(equals(person_static_cohort.team_id, 420), in(person_static_cohort.cohort_id, [12]))) AS __in_cohort ON equals(__in_cohort.cohort_person_id, events.person_id) WHERE and(equals(events.team_id, 420), 1, ifNull(equals(__in_cohort.matched, 1), 0)) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -66,7 +66,7 @@ FROM events LEFT JOIN ( SELECT person_id AS cohort_person_id, 1 AS matched, cohort_id FROM static_cohort_people - WHERE in(cohort_id, [13])) AS __in_cohort ON equals(__in_cohort.cohort_person_id, person_id) + WHERE in(cohort_id, [12])) AS __in_cohort ON equals(__in_cohort.cohort_person_id, person_id) WHERE and(1, equals(__in_cohort.matched, 1)) LIMIT 100 ''' diff --git a/posthog/temporal/batch_exports/batch_exports.py b/posthog/temporal/batch_exports/batch_exports.py index c40950c654426..88cf9e32f274f 100644 --- a/posthog/temporal/batch_exports/batch_exports.py +++ b/posthog/temporal/batch_exports/batch_exports.py @@ -1,15 +1,10 @@ import collections.abc -import csv import dataclasses import datetime as dt -import gzip -import tempfile import typing import uuid from string import Template -import brotli -import orjson import pyarrow as pa from asgiref.sync import sync_to_async from django.conf import settings @@ -286,202 +281,6 @@ def get_data_interval(interval: str, data_interval_end: str | None) -> tuple[dt. return (data_interval_start_dt, data_interval_end_dt) -def json_dumps_bytes(d) -> bytes: - return orjson.dumps(d, default=str) - - -class BatchExportTemporaryFile: - """A TemporaryFile used to as an intermediate step while exporting data. - - This class does not implement the file-like interface but rather passes any calls - to the underlying tempfile.NamedTemporaryFile. We do override 'write' methods - to allow tracking bytes and records. - """ - - def __init__( - self, - mode: str = "w+b", - buffering=-1, - compression: str | None = None, - encoding: str | None = None, - newline: str | None = None, - suffix: str | None = None, - prefix: str | None = None, - dir: str | None = None, - *, - errors: str | None = None, - ): - self._file = tempfile.NamedTemporaryFile( - mode=mode, - encoding=encoding, - newline=newline, - buffering=buffering, - suffix=suffix, - prefix=prefix, - dir=dir, - errors=errors, - ) - self.compression = compression - self.bytes_total = 0 - self.records_total = 0 - self.bytes_since_last_reset = 0 - self.records_since_last_reset = 0 - self._brotli_compressor = None - - def __getattr__(self, name): - """Pass get attr to underlying tempfile.NamedTemporaryFile.""" - return self._file.__getattr__(name) - - def __enter__(self): - """Context-manager protocol enter method.""" - self._file.__enter__() - return self - - def __exit__(self, exc, value, tb): - """Context-manager protocol exit method.""" - return self._file.__exit__(exc, value, tb) - - def __iter__(self): - yield from self._file - - @property - def brotli_compressor(self): - if self._brotli_compressor is None: - self._brotli_compressor = brotli.Compressor() - return self._brotli_compressor - - def compress(self, content: bytes | str) -> bytes: - if isinstance(content, str): - encoded = content.encode("utf-8") - else: - encoded = content - - match self.compression: - case "gzip": - return gzip.compress(encoded) - case "brotli": - self.brotli_compressor.process(encoded) - return self.brotli_compressor.flush() - case None: - return encoded - case _: - raise ValueError(f"Unsupported compression: '{self.compression}'") - - def write(self, content: bytes | str): - """Write bytes to underlying file keeping track of how many bytes were written.""" - compressed_content = self.compress(content) - - if "b" in self.mode: - result = self._file.write(compressed_content) - else: - result = self._file.write(compressed_content.decode("utf-8")) - - self.bytes_total += result - self.bytes_since_last_reset += result - - return result - - def write_record_as_bytes(self, record: bytes): - result = self.write(record) - - self.records_total += 1 - self.records_since_last_reset += 1 - - return result - - def write_records_to_jsonl(self, records): - """Write records to a temporary file as JSONL.""" - if len(records) == 1: - jsonl_dump = orjson.dumps(records[0], option=orjson.OPT_APPEND_NEWLINE, default=str) - else: - jsonl_dump = b"\n".join(map(json_dumps_bytes, records)) - - result = self.write(jsonl_dump) - - self.records_total += len(records) - self.records_since_last_reset += len(records) - - return result - - def write_records_to_csv( - self, - records, - fieldnames: None | collections.abc.Sequence[str] = None, - extrasaction: typing.Literal["raise", "ignore"] = "ignore", - delimiter: str = ",", - quotechar: str = '"', - escapechar: str | None = "\\", - lineterminator: str = "\n", - quoting=csv.QUOTE_NONE, - ): - """Write records to a temporary file as CSV.""" - if len(records) == 0: - return - - if fieldnames is None: - fieldnames = list(records[0].keys()) - - writer = csv.DictWriter( - self, - fieldnames=fieldnames, - extrasaction=extrasaction, - delimiter=delimiter, - quotechar=quotechar, - escapechar=escapechar, - quoting=quoting, - lineterminator=lineterminator, - ) - writer.writerows(records) - - self.records_total += len(records) - self.records_since_last_reset += len(records) - - def write_records_to_tsv( - self, - records, - fieldnames: None | list[str] = None, - extrasaction: typing.Literal["raise", "ignore"] = "ignore", - quotechar: str = '"', - escapechar: str | None = "\\", - lineterminator: str = "\n", - quoting=csv.QUOTE_NONE, - ): - """Write records to a temporary file as TSV.""" - return self.write_records_to_csv( - records, - fieldnames=fieldnames, - extrasaction=extrasaction, - delimiter="\t", - quotechar=quotechar, - escapechar=escapechar, - quoting=quoting, - lineterminator=lineterminator, - ) - - def rewind(self): - """Rewind the file before reading it.""" - if self.compression == "brotli": - result = self._file.write(self.brotli_compressor.finish()) - - self.bytes_total += result - self.bytes_since_last_reset += result - - self._brotli_compressor = None - - self._file.seek(0) - - def reset(self): - """Reset underlying file by truncating it. - - Also resets the tracker attributes for bytes and records since last reset. - """ - self._file.seek(0) - self._file.truncate() - - self.bytes_since_last_reset = 0 - self.records_since_last_reset = 0 - - @dataclasses.dataclass class CreateBatchExportRunInputs: """Inputs to the create_export_run activity. diff --git a/posthog/temporal/batch_exports/bigquery_batch_export.py b/posthog/temporal/batch_exports/bigquery_batch_export.py index a0469de79bb9e..b754a7add16b4 100644 --- a/posthog/temporal/batch_exports/bigquery_batch_export.py +++ b/posthog/temporal/batch_exports/bigquery_batch_export.py @@ -15,7 +15,6 @@ from posthog.batch_exports.service import BatchExportField, BatchExportSchema, BigQueryBatchExportInputs from posthog.temporal.batch_exports.base import PostHogWorkflow from posthog.temporal.batch_exports.batch_exports import ( - BatchExportTemporaryFile, CreateBatchExportRunInputs, UpdateBatchExportRunStatusInputs, create_export_run, @@ -29,6 +28,9 @@ get_bytes_exported_metric, get_rows_exported_metric, ) +from posthog.temporal.batch_exports.temporary_file import ( + BatchExportTemporaryFile, +) from posthog.temporal.batch_exports.utils import peek_first_and_rewind from posthog.temporal.common.clickhouse import get_client from posthog.temporal.common.logger import bind_temporal_worker_logger diff --git a/posthog/temporal/batch_exports/http_batch_export.py b/posthog/temporal/batch_exports/http_batch_export.py index 8aca65c80ff38..2866d50c99876 100644 --- a/posthog/temporal/batch_exports/http_batch_export.py +++ b/posthog/temporal/batch_exports/http_batch_export.py @@ -13,7 +13,6 @@ from posthog.models import BatchExportRun from posthog.temporal.batch_exports.base import PostHogWorkflow from posthog.temporal.batch_exports.batch_exports import ( - BatchExportTemporaryFile, CreateBatchExportRunInputs, UpdateBatchExportRunStatusInputs, create_export_run, @@ -21,12 +20,15 @@ get_data_interval, get_rows_count, iter_records, - json_dumps_bytes, ) from posthog.temporal.batch_exports.metrics import ( get_bytes_exported_metric, get_rows_exported_metric, ) +from posthog.temporal.batch_exports.temporary_file import ( + BatchExportTemporaryFile, + json_dumps_bytes, +) from posthog.temporal.common.clickhouse import get_client from posthog.temporal.common.logger import bind_temporal_worker_logger diff --git a/posthog/temporal/batch_exports/postgres_batch_export.py b/posthog/temporal/batch_exports/postgres_batch_export.py index 5dbfc6faa4acf..98969ee78de79 100644 --- a/posthog/temporal/batch_exports/postgres_batch_export.py +++ b/posthog/temporal/batch_exports/postgres_batch_export.py @@ -17,7 +17,6 @@ from posthog.batch_exports.service import BatchExportField, BatchExportSchema, PostgresBatchExportInputs from posthog.temporal.batch_exports.base import PostHogWorkflow from posthog.temporal.batch_exports.batch_exports import ( - BatchExportTemporaryFile, CreateBatchExportRunInputs, UpdateBatchExportRunStatusInputs, create_export_run, @@ -31,6 +30,9 @@ get_bytes_exported_metric, get_rows_exported_metric, ) +from posthog.temporal.batch_exports.temporary_file import ( + BatchExportTemporaryFile, +) from posthog.temporal.batch_exports.utils import peek_first_and_rewind from posthog.temporal.common.clickhouse import get_client from posthog.temporal.common.logger import bind_temporal_worker_logger diff --git a/posthog/temporal/batch_exports/s3_batch_export.py b/posthog/temporal/batch_exports/s3_batch_export.py index 4d99cbeffd7c3..e83fe3f12915d 100644 --- a/posthog/temporal/batch_exports/s3_batch_export.py +++ b/posthog/temporal/batch_exports/s3_batch_export.py @@ -1,4 +1,5 @@ import asyncio +import collections.abc import contextlib import datetime as dt import io @@ -8,6 +9,8 @@ from dataclasses import dataclass import aioboto3 +import orjson +import pyarrow as pa from django.conf import settings from temporalio import activity, workflow from temporalio.common import RetryPolicy @@ -16,7 +19,6 @@ from posthog.batch_exports.service import BatchExportField, BatchExportSchema, S3BatchExportInputs from posthog.temporal.batch_exports.base import PostHogWorkflow from posthog.temporal.batch_exports.batch_exports import ( - BatchExportTemporaryFile, CreateBatchExportRunInputs, UpdateBatchExportRunStatusInputs, create_export_run, @@ -30,6 +32,15 @@ get_bytes_exported_metric, get_rows_exported_metric, ) +from posthog.temporal.batch_exports.temporary_file import ( + BatchExportTemporaryFile, + BatchExportWriter, + FlushCallable, + JSONLBatchExportWriter, + ParquetBatchExportWriter, + UnsupportedFileFormatError, +) +from posthog.temporal.batch_exports.utils import peek_first_and_rewind from posthog.temporal.common.clickhouse import get_client from posthog.temporal.common.logger import bind_temporal_worker_logger @@ -50,19 +61,31 @@ def get_allowed_template_variables(inputs) -> dict[str, str]: } +FILE_FORMAT_EXTENSIONS = { + "Parquet": "parquet", + "JSONLines": "jsonl", +} + +COMPRESSION_EXTENSIONS = { + "gzip": "gz", + "snappy": "sz", + "brotli": "br", + "ztsd": "zst", + "lz4": "lz4", +} + + def get_s3_key(inputs) -> str: """Return an S3 key given S3InsertInputs.""" template_variables = get_allowed_template_variables(inputs) key_prefix = inputs.prefix.format(**template_variables) + file_extension = FILE_FORMAT_EXTENSIONS[inputs.file_format] base_file_name = f"{inputs.data_interval_start}-{inputs.data_interval_end}" - match inputs.compression: - case "gzip": - file_name = base_file_name + ".jsonl.gz" - case "brotli": - file_name = base_file_name + ".jsonl.br" - case _: - file_name = base_file_name + ".jsonl" + if inputs.compression is not None: + file_name = base_file_name + f".{file_extension}.{COMPRESSION_EXTENSIONS[inputs.compression]}" + else: + file_name = base_file_name + f".{file_extension}" key = posixpath.join(key_prefix, file_name) @@ -311,6 +334,8 @@ class S3InsertInputs: kms_key_id: str | None = None batch_export_schema: BatchExportSchema | None = None endpoint_url: str | None = None + # TODO: In Python 3.11, this could be a enum.StrEnum. + file_format: str = "JSONLines" async def initialize_and_resume_multipart_upload(inputs: S3InsertInputs) -> tuple[S3MultiPartUpload, str]: @@ -451,7 +476,7 @@ async def insert_into_s3_activity(inputs: S3InsertInputs) -> int: last_uploaded_part_timestamp: str | None = None - async def worker_shutdown_handler(): + async def worker_shutdown_handler() -> None: """Handle the Worker shutting down by heart-beating our latest status.""" await activity.wait_for_worker_shutdown() logger.warn( @@ -466,50 +491,147 @@ async def worker_shutdown_handler(): asyncio.create_task(worker_shutdown_handler()) - record = None - async with s3_upload as s3_upload: - with BatchExportTemporaryFile(compression=inputs.compression) as local_results_file: + + async def flush_to_s3( + local_results_file, + records_since_last_flush: int, + bytes_since_last_flush: int, + last_inserted_at: dt.datetime, + last: bool, + ): + nonlocal last_uploaded_part_timestamp + + logger.debug( + "Uploading %s part %s containing %s records with size %s bytes", + "last " if last else "", + s3_upload.part_number + 1, + records_since_last_flush, + bytes_since_last_flush, + ) + + await s3_upload.upload_part(local_results_file) + rows_exported.add(records_since_last_flush) + bytes_exported.add(bytes_since_last_flush) + + last_uploaded_part_timestamp = str(last_inserted_at) + activity.heartbeat(last_uploaded_part_timestamp, s3_upload.to_state()) + + first_record_batch, record_iterator = peek_first_and_rewind(record_iterator) + first_record_batch = cast_record_batch_json_columns(first_record_batch) + column_names = first_record_batch.column_names + column_names.pop(column_names.index("_inserted_at")) + + schema = pa.schema( + # NOTE: For some reason, some batches set non-nullable fields as non-nullable, whereas other + # record batches have them as nullable. + # Until we figure it out, we set all fields to nullable. There are some fields we know + # are not nullable, but I'm opting for the more flexible option until we out why schemas differ + # between batches. + [field.with_nullable(True) for field in first_record_batch.select(column_names).schema] + ) + + writer = get_batch_export_writer( + inputs, + flush_callable=flush_to_s3, + max_bytes=settings.BATCH_EXPORT_S3_UPLOAD_CHUNK_SIZE_BYTES, + schema=schema, + ) + + async with writer.open_temporary_file(): rows_exported = get_rows_exported_metric() bytes_exported = get_bytes_exported_metric() - async def flush_to_s3(last_uploaded_part_timestamp: str, last=False): - logger.debug( - "Uploading %s part %s containing %s records with size %s bytes", - "last " if last else "", - s3_upload.part_number + 1, - local_results_file.records_since_last_reset, - local_results_file.bytes_since_last_reset, - ) + for record_batch in record_iterator: + record_batch = cast_record_batch_json_columns(record_batch) - await s3_upload.upload_part(local_results_file) - rows_exported.add(local_results_file.records_since_last_reset) - bytes_exported.add(local_results_file.bytes_since_last_reset) + await writer.write_record_batch(record_batch) - activity.heartbeat(last_uploaded_part_timestamp, s3_upload.to_state()) + await s3_upload.complete() - for record_batch in record_iterator: - for record in record_batch.to_pylist(): - for json_column in ("properties", "person_properties", "set", "set_once"): - if (json_str := record.get(json_column, None)) is not None: - record[json_column] = json.loads(json_str) + return writer.records_total - inserted_at = record.pop("_inserted_at") - local_results_file.write_records_to_jsonl([record]) +def get_batch_export_writer( + inputs: S3InsertInputs, flush_callable: FlushCallable, max_bytes: int, schema: pa.Schema | None = None +) -> BatchExportWriter: + """Return the `BatchExportWriter` corresponding to configured `file_format`. - if local_results_file.tell() > settings.BATCH_EXPORT_S3_UPLOAD_CHUNK_SIZE_BYTES: - last_uploaded_part_timestamp = str(inserted_at) - await flush_to_s3(last_uploaded_part_timestamp) - local_results_file.reset() + Raises: + UnsupportedFileFormatError: If no writer exists for given `file_format`. + """ + writer: BatchExportWriter - if local_results_file.tell() > 0 and record is not None: - last_uploaded_part_timestamp = str(inserted_at) - await flush_to_s3(last_uploaded_part_timestamp, last=True) + if inputs.file_format == "Parquet": + writer = ParquetBatchExportWriter( + max_bytes=max_bytes, + flush_callable=flush_callable, + compression=inputs.compression, + schema=schema, + ) + elif inputs.file_format == "JSONLines": + writer = JSONLBatchExportWriter( + max_bytes=settings.BATCH_EXPORT_S3_UPLOAD_CHUNK_SIZE_BYTES, + flush_callable=flush_callable, + compression=inputs.compression, + ) + else: + raise UnsupportedFileFormatError(inputs.file_format, "S3") - await s3_upload.complete() + return writer + + +def cast_record_batch_json_columns( + record_batch: pa.RecordBatch, + json_columns: collections.abc.Sequence = ("properties", "person_properties", "set", "set_once"), +) -> pa.RecordBatch: + """Cast json_columns in record_batch to JsonType. + + We return a new RecordBatch with any json_columns replaced by fields casted to JsonType. + Casting is not copying the underlying array buffers, so memory usage does not increase when creating + the new array or the new record batch. + """ + column_names = set(record_batch.column_names) + intersection = column_names & set(json_columns) + + casted_arrays = [] + for array in record_batch.select(intersection): + if pa.types.is_string(array.type): + casted_array = array.cast(JsonType()) + casted_arrays.append(casted_array) + + remaining_column_names = list(column_names - intersection) + return pa.RecordBatch.from_arrays( + record_batch.select(remaining_column_names).columns + casted_arrays, + names=remaining_column_names + list(intersection), + ) + + +class JsonScalar(pa.ExtensionScalar): + """Represents a JSON binary string.""" + + def as_py(self) -> dict | None: + if self.value: + return orjson.loads(self.value.as_py().encode("utf-8")) + else: + return None + + +class JsonType(pa.ExtensionType): + """Type for JSON binary strings.""" + + def __init__(self): + super().__init__(pa.string(), "json") + + def __arrow_ext_serialize__(self): + return b"" + + @classmethod + def __arrow_ext_deserialize__(self, storage_type, serialized): + return JsonType() - return local_results_file.records_total + def __arrow_ext_scalar_class__(self): + return JsonScalar @workflow.defn(name="s3-export") @@ -572,6 +694,7 @@ async def run(self, inputs: S3BatchExportInputs): encryption=inputs.encryption, kms_key_id=inputs.kms_key_id, batch_export_schema=inputs.batch_export_schema, + file_format=inputs.file_format, ) await execute_batch_export_insert_activity( diff --git a/posthog/temporal/batch_exports/snowflake_batch_export.py b/posthog/temporal/batch_exports/snowflake_batch_export.py index be94eca89a799..9053f3e1006ad 100644 --- a/posthog/temporal/batch_exports/snowflake_batch_export.py +++ b/posthog/temporal/batch_exports/snowflake_batch_export.py @@ -18,7 +18,6 @@ from posthog.batch_exports.service import BatchExportField, BatchExportSchema, SnowflakeBatchExportInputs from posthog.temporal.batch_exports.base import PostHogWorkflow from posthog.temporal.batch_exports.batch_exports import ( - BatchExportTemporaryFile, CreateBatchExportRunInputs, UpdateBatchExportRunStatusInputs, create_export_run, @@ -32,6 +31,9 @@ get_bytes_exported_metric, get_rows_exported_metric, ) +from posthog.temporal.batch_exports.temporary_file import ( + BatchExportTemporaryFile, +) from posthog.temporal.batch_exports.utils import peek_first_and_rewind from posthog.temporal.common.clickhouse import get_client from posthog.temporal.common.logger import bind_temporal_worker_logger diff --git a/posthog/temporal/batch_exports/temporary_file.py b/posthog/temporal/batch_exports/temporary_file.py new file mode 100644 index 0000000000000..f955f45553727 --- /dev/null +++ b/posthog/temporal/batch_exports/temporary_file.py @@ -0,0 +1,528 @@ +"""This module contains a temporary file to stage data in batch exports.""" +import abc +import collections.abc +import contextlib +import csv +import datetime as dt +import gzip +import tempfile +import typing + +import brotli +import orjson +import pyarrow as pa +import pyarrow.parquet as pq + + +def json_dumps_bytes(d) -> bytes: + return orjson.dumps(d, default=str) + + +class BatchExportTemporaryFile: + """A TemporaryFile used to as an intermediate step while exporting data. + + This class does not implement the file-like interface but rather passes any calls + to the underlying tempfile.NamedTemporaryFile. We do override 'write' methods + to allow tracking bytes and records. + """ + + def __init__( + self, + mode: str = "w+b", + buffering=-1, + compression: str | None = None, + encoding: str | None = None, + newline: str | None = None, + suffix: str | None = None, + prefix: str | None = None, + dir: str | None = None, + *, + errors: str | None = None, + ): + self._file = tempfile.NamedTemporaryFile( + mode=mode, + encoding=encoding, + newline=newline, + buffering=buffering, + suffix=suffix, + prefix=prefix, + dir=dir, + errors=errors, + ) + self.compression = compression + self.bytes_total = 0 + self.records_total = 0 + self.bytes_since_last_reset = 0 + self.records_since_last_reset = 0 + self._brotli_compressor = None + + def __getattr__(self, name): + """Pass get attr to underlying tempfile.NamedTemporaryFile.""" + return self._file.__getattr__(name) + + def __enter__(self): + """Context-manager protocol enter method.""" + self._file.__enter__() + return self + + def __exit__(self, exc, value, tb): + """Context-manager protocol exit method.""" + return self._file.__exit__(exc, value, tb) + + def __iter__(self): + yield from self._file + + @property + def brotli_compressor(self): + if self._brotli_compressor is None: + self._brotli_compressor = brotli.Compressor() + return self._brotli_compressor + + def finish_brotli_compressor(self): + """Flush remaining brotli bytes.""" + # TODO: Move compression out of `BatchExportTemporaryFile` to a standard class for all writers. + if self.compression != "brotli": + raise ValueError(f"Compression is '{self.compression}', not 'brotli'") + + result = self._file.write(self.brotli_compressor.finish()) + self.bytes_total += result + self.bytes_since_last_reset += result + self._brotli_compressor = None + + def compress(self, content: bytes | str) -> bytes: + if isinstance(content, str): + encoded = content.encode("utf-8") + else: + encoded = content + + match self.compression: + case "gzip": + return gzip.compress(encoded) + case "brotli": + self.brotli_compressor.process(encoded) + return self.brotli_compressor.flush() + case None: + return encoded + case _: + raise ValueError(f"Unsupported compression: '{self.compression}'") + + def write(self, content: bytes | str): + """Write bytes to underlying file keeping track of how many bytes were written.""" + compressed_content = self.compress(content) + + if "b" in self.mode: + result = self._file.write(compressed_content) + else: + result = self._file.write(compressed_content.decode("utf-8")) + + self.bytes_total += result + self.bytes_since_last_reset += result + + return result + + def write_record_as_bytes(self, record: bytes): + result = self.write(record) + + self.records_total += 1 + self.records_since_last_reset += 1 + + return result + + def write_records_to_jsonl(self, records): + """Write records to a temporary file as JSONL.""" + if len(records) == 1: + jsonl_dump = orjson.dumps(records[0], option=orjson.OPT_APPEND_NEWLINE, default=str) + else: + jsonl_dump = b"\n".join(map(json_dumps_bytes, records)) + + result = self.write(jsonl_dump) + + self.records_total += len(records) + self.records_since_last_reset += len(records) + + return result + + def write_records_to_csv( + self, + records, + fieldnames: None | collections.abc.Sequence[str] = None, + extrasaction: typing.Literal["raise", "ignore"] = "ignore", + delimiter: str = ",", + quotechar: str = '"', + escapechar: str | None = "\\", + lineterminator: str = "\n", + quoting=csv.QUOTE_NONE, + ): + """Write records to a temporary file as CSV.""" + if len(records) == 0: + return + + if fieldnames is None: + fieldnames = list(records[0].keys()) + + writer = csv.DictWriter( + self, + fieldnames=fieldnames, + extrasaction=extrasaction, + delimiter=delimiter, + quotechar=quotechar, + escapechar=escapechar, + quoting=quoting, + lineterminator=lineterminator, + ) + writer.writerows(records) + + self.records_total += len(records) + self.records_since_last_reset += len(records) + + def write_records_to_tsv( + self, + records, + fieldnames: None | list[str] = None, + extrasaction: typing.Literal["raise", "ignore"] = "ignore", + quotechar: str = '"', + escapechar: str | None = "\\", + lineterminator: str = "\n", + quoting=csv.QUOTE_NONE, + ): + """Write records to a temporary file as TSV.""" + return self.write_records_to_csv( + records, + fieldnames=fieldnames, + extrasaction=extrasaction, + delimiter="\t", + quotechar=quotechar, + escapechar=escapechar, + quoting=quoting, + lineterminator=lineterminator, + ) + + def rewind(self): + """Rewind the file before reading it.""" + self._file.seek(0) + + def reset(self): + """Reset underlying file by truncating it. + + Also resets the tracker attributes for bytes and records since last reset. + """ + self._file.seek(0) + self._file.truncate() + + self.bytes_since_last_reset = 0 + self.records_since_last_reset = 0 + + +LastInsertedAt = dt.datetime +IsLast = bool +RecordsSinceLastFlush = int +BytesSinceLastFlush = int +FlushCallable = collections.abc.Callable[ + [BatchExportTemporaryFile, RecordsSinceLastFlush, BytesSinceLastFlush, LastInsertedAt, IsLast], + collections.abc.Awaitable[None], +] + + +class UnsupportedFileFormatError(Exception): + """Raised when a writer for an unsupported file format is requested.""" + + def __init__(self, file_format: str, destination: str): + super().__init__(f"{file_format} is not a supported format for {destination} batch exports.") + + +class BatchExportWriter(abc.ABC): + """A temporary file writer to be used by batch export workflows. + + Subclasses should define `_write_record_batch` with the particular intricacies + of the format they are writing as. + + Actual writing calls are passed to the underlying `batch_export_file`. + + Attributes: + _batch_export_file: The temporary file we are writing to. + max_bytes: Flush the temporary file with the provided `flush_callable` + upon reaching or surpassing this threshold. Keep in mind we write on a RecordBatch + per RecordBatch basis, which means the threshold will be surpassed by at most the + size of a RecordBatch before a flush occurs. + flush_callable: A callback to flush the temporary file when `max_bytes` is reached. + The temporary file will be reset after calling `flush_callable`. When calling + `flush_callable` the following positional arguments will be passed: The temporary file + that must be flushed, the number of records since the last flush, the number of bytes + since the last flush, the latest recorded `_inserted_at`, and a `bool` indicating if + this is the last flush (when exiting the context manager). + file_kwargs: Optional keyword arguments passed when initializing `_batch_export_file`. + last_inserted_at: Latest `_inserted_at` written. This attribute leaks some implementation + details, as we are assuming assume `_inserted_at` is present, as it's added to all + batch export queries. + records_total: The total number of records (not RecordBatches!) written. + records_since_last_flush: The number of records written since last flush. + bytes_total: The total number of bytes written. + bytes_since_last_flush: The number of bytes written since last flush. + """ + + def __init__( + self, + flush_callable: FlushCallable, + max_bytes: int, + file_kwargs: collections.abc.Mapping[str, typing.Any] | None = None, + ): + self.flush_callable = flush_callable + self.max_bytes = max_bytes + self.file_kwargs: collections.abc.Mapping[str, typing.Any] = file_kwargs or {} + + self._batch_export_file: BatchExportTemporaryFile | None = None + self.reset_writer_tracking() + + def reset_writer_tracking(self): + """Reset this writer's tracking state.""" + self.last_inserted_at: dt.datetime | None = None + self.records_total = 0 + self.records_since_last_flush = 0 + self.bytes_total = 0 + self.bytes_since_last_flush = 0 + + @contextlib.asynccontextmanager + async def open_temporary_file(self): + """Explicitly open the temporary file this writer is writing to. + + The underlying `BatchExportTemporaryFile` is only accessible within this context manager. This helps + us separate the lifetime of the underlying temporary file from the writer: The writer may still be + accessed even after the temporary file is closed, while on the other hand we ensure the file and all + its data is flushed and not leaked outside the context. Any relevant tracking information is copied + to the writer. + """ + self.reset_writer_tracking() + + with BatchExportTemporaryFile(**self.file_kwargs) as temp_file: + self._batch_export_file = temp_file + + try: + yield + finally: + self.track_bytes_written(temp_file) + + if self.last_inserted_at is not None and self.bytes_since_last_flush > 0: + # `bytes_since_last_flush` should be 0 unless: + # 1. The last batch wasn't flushed as it didn't reach `max_bytes`. + # 2. The last batch was flushed but there was another write after the last call to + # `write_record_batch`. For example, footer bytes. + await self.flush(self.last_inserted_at, is_last=True) + + self._batch_export_file = None + + @property + def batch_export_file(self): + """Property for underlying temporary file. + + Raises: + ValueError: if attempting to access the temporary file before it has been opened. + """ + if self._batch_export_file is None: + raise ValueError("Batch export file is closed. Did you forget to call 'open_temporary_file'?") + return self._batch_export_file + + @abc.abstractmethod + def _write_record_batch(self, record_batch: pa.RecordBatch) -> None: + """Write a record batch to the underlying `BatchExportTemporaryFile`. + + Subclasses must override this to provide the actual implementation according to the supported + file format. + """ + pass + + def track_records_written(self, record_batch: pa.RecordBatch) -> None: + """Update this writer's state with the number of records in `record_batch`.""" + self.records_total += record_batch.num_rows + self.records_since_last_flush += record_batch.num_rows + + def track_bytes_written(self, batch_export_file: BatchExportTemporaryFile) -> None: + """Update this writer's state with the bytes in `batch_export_file`.""" + self.bytes_total = batch_export_file.bytes_total + self.bytes_since_last_flush = batch_export_file.bytes_since_last_reset + + async def write_record_batch(self, record_batch: pa.RecordBatch) -> None: + """Issue a record batch write tracking progress and flushing if required.""" + record_batch = record_batch.sort_by("_inserted_at") + last_inserted_at = record_batch.column("_inserted_at")[-1].as_py() + + column_names = record_batch.column_names + column_names.pop(column_names.index("_inserted_at")) + + self._write_record_batch(record_batch.select(column_names)) + + self.last_inserted_at = last_inserted_at + self.track_records_written(record_batch) + self.track_bytes_written(self.batch_export_file) + + if self.bytes_since_last_flush >= self.max_bytes: + await self.flush(last_inserted_at) + + async def flush(self, last_inserted_at: dt.datetime, is_last: bool = False) -> None: + """Call the provided `flush_callable` and reset underlying file. + + The underlying batch export temporary file will be reset after calling `flush_callable`. + """ + if is_last is True and self.batch_export_file.compression == "brotli": + self.batch_export_file.finish_brotli_compressor() + + self.batch_export_file.seek(0) + + await self.flush_callable( + self.batch_export_file, + self.records_since_last_flush, + self.bytes_since_last_flush, + last_inserted_at, + is_last, + ) + self.batch_export_file.reset() + + self.records_since_last_flush = 0 + self.bytes_since_last_flush = 0 + + +class JSONLBatchExportWriter(BatchExportWriter): + """A `BatchExportWriter` for JSONLines format. + + Attributes: + default: The default function to use to cast non-serializable Python objects to serializable objects. + By default, non-serializable objects will be cast to string via `str()`. + """ + + def __init__( + self, + max_bytes: int, + flush_callable: FlushCallable, + compression: None | str = None, + default: typing.Callable = str, + ): + super().__init__( + max_bytes=max_bytes, + flush_callable=flush_callable, + file_kwargs={"compression": compression}, + ) + + self.default = default + + def write(self, content: bytes) -> int: + """Write a single row of JSONL.""" + n = self.batch_export_file.write(orjson.dumps(content, default=str) + b"\n") + return n + + def _write_record_batch(self, record_batch: pa.RecordBatch) -> None: + """Write records to a temporary file as JSONL.""" + for record in record_batch.to_pylist(): + self.write(record) + + +class CSVBatchExportWriter(BatchExportWriter): + """A `BatchExportWriter` for CSV format.""" + + def __init__( + self, + max_bytes: int, + flush_callable: FlushCallable, + field_names: collections.abc.Sequence[str], + extras_action: typing.Literal["raise", "ignore"] = "ignore", + delimiter: str = ",", + quote_char: str = '"', + escape_char: str | None = "\\", + line_terminator: str = "\n", + quoting=csv.QUOTE_NONE, + compression: str | None = None, + ): + super().__init__( + max_bytes=max_bytes, + flush_callable=flush_callable, + file_kwargs={"compression": compression}, + ) + self.field_names = field_names + self.extras_action: typing.Literal["raise", "ignore"] = extras_action + self.delimiter = delimiter + self.quote_char = quote_char + self.escape_char = escape_char + self.line_terminator = line_terminator + self.quoting = quoting + + self._csv_writer: csv.DictWriter | None = None + + @property + def csv_writer(self) -> csv.DictWriter: + if self._csv_writer is None: + self._csv_writer = csv.DictWriter( + self.batch_export_file, + fieldnames=self.field_names, + extrasaction=self.extras_action, + delimiter=self.delimiter, + quotechar=self.quote_char, + escapechar=self.escape_char, + quoting=self.quoting, + lineterminator=self.line_terminator, + ) + + return self._csv_writer + + def _write_record_batch(self, record_batch: pa.RecordBatch) -> None: + """Write records to a temporary file as CSV.""" + self.csv_writer.writerows(record_batch.to_pylist()) + + +class ParquetBatchExportWriter(BatchExportWriter): + """A `BatchExportWriter` for Apache Parquet format. + + We utilize and wrap a `pyarrow.parquet.ParquetWriter` to do the actual writing. We default to their + defaults for most parameters; however this class could be extended with more attributes to pass along + to `pyarrow.parquet.ParquetWriter`. + + See the pyarrow docs for more details on what parameters can the writer be configured with: + https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html + + In contrast to other writers, instead of us handling compression we let `pyarrow.parquet.ParquetWriter` + handle it, so `BatchExportTemporaryFile` is always initialized with `compression=None`. + + Attributes: + schema: The schema used by the Parquet file. Should match the schema of written RecordBatches. + compression: Compression codec passed to underlying `pyarrow.parquet.ParquetWriter`. + """ + + def __init__( + self, + max_bytes: int, + flush_callable: FlushCallable, + schema: pa.Schema, + compression: str | None = "snappy", + ): + super().__init__( + max_bytes=max_bytes, + flush_callable=flush_callable, + file_kwargs={"compression": None}, # ParquetWriter handles compression + ) + self.schema = schema + self.compression = compression + + self._parquet_writer: pq.ParquetWriter | None = None + + @property + def parquet_writer(self) -> pq.ParquetWriter: + if self._parquet_writer is None: + self._parquet_writer = pq.ParquetWriter( + self.batch_export_file, + schema=self.schema, + compression="none" if self.compression is None else self.compression, + ) + return self._parquet_writer + + @contextlib.asynccontextmanager + async def open_temporary_file(self): + """Ensure underlying Parquet writer is closed before flushing and closing temporary file.""" + async with super().open_temporary_file(): + try: + yield + finally: + if self._parquet_writer is not None: + self._parquet_writer.writer.close() + self._parquet_writer = None + + def _write_record_batch(self, record_batch: pa.RecordBatch) -> None: + """Write records to a temporary file as Parquet.""" + + self.parquet_writer.write_batch(record_batch.select(self.parquet_writer.schema.names)) diff --git a/posthog/temporal/tests/batch_exports/test_batch_exports.py b/posthog/temporal/tests/batch_exports/test_batch_exports.py index 0afbfcabb71cb..756c07e442e4f 100644 --- a/posthog/temporal/tests/batch_exports/test_batch_exports.py +++ b/posthog/temporal/tests/batch_exports/test_batch_exports.py @@ -1,6 +1,4 @@ -import csv import datetime as dt -import io import json import operator from random import randint @@ -9,11 +7,9 @@ from django.test import override_settings from posthog.temporal.batch_exports.batch_exports import ( - BatchExportTemporaryFile, get_data_interval, get_rows_count, iter_records, - json_dumps_bytes, ) from posthog.temporal.tests.utils.events import generate_test_events_in_clickhouse @@ -558,181 +554,3 @@ def test_get_data_interval(interval, data_interval_end, expected): """Test get_data_interval returns the expected data interval tuple.""" result = get_data_interval(interval, data_interval_end) assert result == expected - - -@pytest.mark.parametrize( - "to_write", - [ - (b"",), - (b"", b""), - (b"12345",), - (b"12345", b"12345"), - (b"abbcccddddeeeee",), - (b"abbcccddddeeeee", b"abbcccddddeeeee"), - ], -) -def test_batch_export_temporary_file_tracks_bytes(to_write): - """Test the bytes written by BatchExportTemporaryFile match expected.""" - with BatchExportTemporaryFile() as be_file: - for content in to_write: - be_file.write(content) - - assert be_file.bytes_total == sum(len(content) for content in to_write) - assert be_file.bytes_since_last_reset == sum(len(content) for content in to_write) - - be_file.reset() - - assert be_file.bytes_total == sum(len(content) for content in to_write) - assert be_file.bytes_since_last_reset == 0 - - -TEST_RECORDS = [ - [], - [ - {"id": "record-1", "property": "value", "property_int": 1}, - {"id": "record-2", "property": "another-value", "property_int": 2}, - { - "id": "record-3", - "property": {"id": "nested-record", "property": "nested-value"}, - "property_int": 3, - }, - ], -] - - -@pytest.mark.parametrize( - "records", - TEST_RECORDS, -) -def test_batch_export_temporary_file_write_records_to_jsonl(records): - """Test JSONL records written by BatchExportTemporaryFile match expected.""" - jsonl_dump = b"\n".join(map(json_dumps_bytes, records)) - - with BatchExportTemporaryFile() as be_file: - be_file.write_records_to_jsonl(records) - - assert be_file.bytes_total == len(jsonl_dump) - assert be_file.bytes_since_last_reset == len(jsonl_dump) - assert be_file.records_total == len(records) - assert be_file.records_since_last_reset == len(records) - - be_file.seek(0) - lines = be_file.readlines() - assert len(lines) == len(records) - - for line_index, jsonl_record in enumerate(lines): - json_loaded = json.loads(jsonl_record) - assert json_loaded == records[line_index] - - be_file.reset() - - assert be_file.bytes_total == len(jsonl_dump) - assert be_file.bytes_since_last_reset == 0 - assert be_file.records_total == len(records) - assert be_file.records_since_last_reset == 0 - - -@pytest.mark.parametrize( - "records", - TEST_RECORDS, -) -def test_batch_export_temporary_file_write_records_to_csv(records): - """Test CSV written by BatchExportTemporaryFile match expected.""" - in_memory_file_obj = io.StringIO() - writer = csv.DictWriter( - in_memory_file_obj, - fieldnames=records[0].keys() if len(records) > 0 else [], - delimiter=",", - quotechar='"', - escapechar="\\", - lineterminator="\n", - quoting=csv.QUOTE_NONE, - ) - writer.writerows(records) - - with BatchExportTemporaryFile(mode="w+") as be_file: - be_file.write_records_to_csv(records) - - assert be_file.bytes_total == in_memory_file_obj.tell() - assert be_file.bytes_since_last_reset == in_memory_file_obj.tell() - assert be_file.records_total == len(records) - assert be_file.records_since_last_reset == len(records) - - be_file.seek(0) - reader = csv.reader( - be_file._file, - delimiter=",", - quotechar='"', - escapechar="\\", - quoting=csv.QUOTE_NONE, - ) - - rows = [row for row in reader] - assert len(rows) == len(records) - - for row_index, csv_record in enumerate(rows): - for value_index, value in enumerate(records[row_index].values()): - # Everything returned by csv.reader is a str. - # This means type information is lost when writing to CSV - # but this just a limitation of the format. - assert csv_record[value_index] == str(value) - - be_file.reset() - - assert be_file.bytes_total == in_memory_file_obj.tell() - assert be_file.bytes_since_last_reset == 0 - assert be_file.records_total == len(records) - assert be_file.records_since_last_reset == 0 - - -@pytest.mark.parametrize( - "records", - TEST_RECORDS, -) -def test_batch_export_temporary_file_write_records_to_tsv(records): - """Test TSV written by BatchExportTemporaryFile match expected.""" - in_memory_file_obj = io.StringIO() - writer = csv.DictWriter( - in_memory_file_obj, - fieldnames=records[0].keys() if len(records) > 0 else [], - delimiter="\t", - quotechar='"', - escapechar="\\", - lineterminator="\n", - quoting=csv.QUOTE_NONE, - ) - writer.writerows(records) - - with BatchExportTemporaryFile(mode="w+") as be_file: - be_file.write_records_to_tsv(records) - - assert be_file.bytes_total == in_memory_file_obj.tell() - assert be_file.bytes_since_last_reset == in_memory_file_obj.tell() - assert be_file.records_total == len(records) - assert be_file.records_since_last_reset == len(records) - - be_file.seek(0) - reader = csv.reader( - be_file._file, - delimiter="\t", - quotechar='"', - escapechar="\\", - quoting=csv.QUOTE_NONE, - ) - - rows = [row for row in reader] - assert len(rows) == len(records) - - for row_index, csv_record in enumerate(rows): - for value_index, value in enumerate(records[row_index].values()): - # Everything returned by csv.reader is a str. - # This means type information is lost when writing to CSV - # but this just a limitation of the format. - assert csv_record[value_index] == str(value) - - be_file.reset() - - assert be_file.bytes_total == in_memory_file_obj.tell() - assert be_file.bytes_since_last_reset == 0 - assert be_file.records_total == len(records) - assert be_file.records_since_last_reset == 0 diff --git a/posthog/temporal/tests/batch_exports/test_s3_batch_export_workflow.py b/posthog/temporal/tests/batch_exports/test_s3_batch_export_workflow.py index e04e345d11245..e6583d049e2a8 100644 --- a/posthog/temporal/tests/batch_exports/test_s3_batch_export_workflow.py +++ b/posthog/temporal/tests/batch_exports/test_s3_batch_export_workflow.py @@ -10,10 +10,12 @@ import aioboto3 import botocore.exceptions import brotli +import pyarrow.parquet as pq import pytest import pytest_asyncio from django.conf import settings from django.test import override_settings +from pyarrow import fs from temporalio import activity from temporalio.client import WorkflowFailureError from temporalio.common import RetryPolicy @@ -27,6 +29,7 @@ update_export_run_status, ) from posthog.temporal.batch_exports.s3_batch_export import ( + FILE_FORMAT_EXTENSIONS, HeartbeatDetails, S3BatchExportInputs, S3BatchExportWorkflow, @@ -107,6 +110,15 @@ def s3_key_prefix(): return f"posthog-events-{str(uuid4())}" +@pytest.fixture +def file_format(request) -> str: + """S3 file format.""" + try: + return request.param + except AttributeError: + return f"JSONLines" + + async def delete_all_from_s3(minio_client, bucket_name: str, key_prefix: str): """Delete all objects in bucket_name under key_prefix.""" response = await minio_client.list_objects_v2(Bucket=bucket_name, Prefix=key_prefix) @@ -138,6 +150,61 @@ async def minio_client(bucket_name): await minio_client.delete_bucket(Bucket=bucket_name) +async def read_parquet_from_s3(bucket_name: str, key: str, json_columns) -> list: + async with aioboto3.Session().client("sts") as sts: + try: + await sts.get_caller_identity() + except botocore.exceptions.NoCredentialsError: + s3 = fs.S3FileSystem( + access_key="object_storage_root_user", + secret_key="object_storage_root_password", + endpoint_override=settings.OBJECT_STORAGE_ENDPOINT, + ) + + else: + if os.getenv("S3_TEST_BUCKET") is not None: + s3 = fs.S3FileSystem() + else: + s3 = fs.S3FileSystem( + access_key="object_storage_root_user", + secret_key="object_storage_root_password", + endpoint_override=settings.OBJECT_STORAGE_ENDPOINT, + ) + + table = pq.read_table(f"{bucket_name}/{key}", filesystem=s3) + + parquet_data = [] + for batch in table.to_batches(): + for record in batch.to_pylist(): + casted_record = {} + for k, v in record.items(): + if isinstance(v, dt.datetime): + # We read data from clickhouse as string, but parquet already casts them as dates. + # To facilitate comparison, we isoformat the dates. + casted_record[k] = v.isoformat() + elif k in json_columns and v is not None: + # Parquet doesn't have a variable map type, so JSON fields are just strings. + casted_record[k] = json.loads(v) + else: + casted_record[k] = v + parquet_data.append(casted_record) + + return parquet_data + + +def read_s3_data_as_json(data: bytes, compression: str | None) -> list: + match compression: + case "gzip": + data = gzip.decompress(data) + case "brotli": + data = brotli.decompress(data) + case _: + pass + + json_data = [json.loads(line) for line in data.decode("utf-8").split("\n") if line] + return json_data + + async def assert_clickhouse_records_in_s3( s3_compatible_client, clickhouse_client: ClickHouseClient, @@ -150,6 +217,7 @@ async def assert_clickhouse_records_in_s3( include_events: list[str] | None = None, batch_export_schema: BatchExportSchema | None = None, compression: str | None = None, + file_format: str = "JSONLines", ): """Assert ClickHouse records are written to JSON in key_prefix in S3 bucket_name. @@ -175,28 +243,24 @@ async def assert_clickhouse_records_in_s3( # Get the object. key = objects["Contents"][0].get("Key") assert key - s3_object = await s3_compatible_client.get_object(Bucket=bucket_name, Key=key) - data = await s3_object["Body"].read() - # Check that the data is correct. - match compression: - case "gzip": - data = gzip.decompress(data) - case "brotli": - data = brotli.decompress(data) - case _: - pass + json_columns = ("properties", "person_properties", "set", "set_once") - json_data = [json.loads(line) for line in data.decode("utf-8").split("\n") if line] - # Pull out the fields we inserted only + if file_format == "Parquet": + s3_data = await read_parquet_from_s3(bucket_name, key, json_columns) + + elif file_format == "JSONLines": + s3_object = await s3_compatible_client.get_object(Bucket=bucket_name, Key=key) + data = await s3_object["Body"].read() + s3_data = read_s3_data_as_json(data, compression) + else: + raise ValueError(f"Unsupported file format: {file_format}") if batch_export_schema is not None: schema_column_names = [field["alias"] for field in batch_export_schema["fields"]] else: schema_column_names = [field["alias"] for field in s3_default_fields()] - json_columns = ("properties", "person_properties", "set", "set_once") - expected_records = [] for record_batch in iter_records( client=clickhouse_client, @@ -225,9 +289,9 @@ async def assert_clickhouse_records_in_s3( expected_records.append(expected_record) - assert len(json_data) == len(expected_records) - assert json_data[0] == expected_records[0] - assert json_data == expected_records + assert len(s3_data) == len(expected_records) + assert s3_data[0] == expected_records[0] + assert s3_data == expected_records TEST_S3_SCHEMAS: list[BatchExportSchema | None] = [ @@ -255,6 +319,7 @@ async def assert_clickhouse_records_in_s3( @pytest.mark.parametrize("compression", [None, "gzip", "brotli"], indirect=True) @pytest.mark.parametrize("exclude_events", [None, ["test-exclude"]], indirect=True) @pytest.mark.parametrize("batch_export_schema", TEST_S3_SCHEMAS) +@pytest.mark.parametrize("file_format", FILE_FORMAT_EXTENSIONS.keys()) async def test_insert_into_s3_activity_puts_data_into_s3( clickhouse_client, bucket_name, @@ -262,6 +327,7 @@ async def test_insert_into_s3_activity_puts_data_into_s3( activity_environment, compression, exclude_events, + file_format, batch_export_schema: BatchExportSchema | None, ): """Test that the insert_into_s3_activity function ends up with data into S3. @@ -339,12 +405,15 @@ async def test_insert_into_s3_activity_puts_data_into_s3( compression=compression, exclude_events=exclude_events, batch_export_schema=batch_export_schema, + file_format=file_format, ) with override_settings( BATCH_EXPORT_S3_UPLOAD_CHUNK_SIZE_BYTES=5 * 1024**2 ): # 5MB, the minimum for Multipart uploads - await activity_environment.run(insert_into_s3_activity, insert_inputs) + records_total = await activity_environment.run(insert_into_s3_activity, insert_inputs) + + assert records_total == 10005 await assert_clickhouse_records_in_s3( s3_compatible_client=minio_client, @@ -358,6 +427,7 @@ async def test_insert_into_s3_activity_puts_data_into_s3( exclude_events=exclude_events, include_events=None, compression=compression, + file_format=file_format, ) @@ -371,6 +441,7 @@ async def s3_batch_export( exclude_events, temporal_client, encryption, + file_format, ): destination_data = { "type": "S3", @@ -385,6 +456,7 @@ async def s3_batch_export( "exclude_events": exclude_events, "encryption": encryption, "kms_key_id": os.getenv("S3_TEST_KMS_KEY_ID") if encryption == "aws:kms" else None, + "file_format": file_format, }, } @@ -410,6 +482,7 @@ async def s3_batch_export( @pytest.mark.parametrize("compression", [None, "gzip", "brotli"], indirect=True) @pytest.mark.parametrize("exclude_events", [None, ["test-exclude"]], indirect=True) @pytest.mark.parametrize("batch_export_schema", TEST_S3_SCHEMAS) +@pytest.mark.parametrize("file_format", FILE_FORMAT_EXTENSIONS.keys(), indirect=True) async def test_s3_export_workflow_with_minio_bucket( clickhouse_client, minio_client, @@ -421,6 +494,7 @@ async def test_s3_export_workflow_with_minio_bucket( exclude_events, s3_key_prefix, batch_export_schema, + file_format, ): """Test S3BatchExport Workflow end-to-end by using a local MinIO bucket instead of S3. @@ -508,6 +582,7 @@ async def test_s3_export_workflow_with_minio_bucket( batch_export_schema=batch_export_schema, exclude_events=exclude_events, compression=compression, + file_format=file_format, ) @@ -537,6 +612,7 @@ async def s3_client(bucket_name, s3_key_prefix): @pytest.mark.parametrize("encryption", [None, "AES256", "aws:kms"], indirect=True) @pytest.mark.parametrize("bucket_name", [os.getenv("S3_TEST_BUCKET")], indirect=True) @pytest.mark.parametrize("batch_export_schema", TEST_S3_SCHEMAS) +@pytest.mark.parametrize("file_format", FILE_FORMAT_EXTENSIONS.keys(), indirect=True) async def test_s3_export_workflow_with_s3_bucket( s3_client, clickhouse_client, @@ -549,6 +625,7 @@ async def test_s3_export_workflow_with_s3_bucket( exclude_events, ateam, batch_export_schema, + file_format, ): """Test S3 Export Workflow end-to-end by using an S3 bucket. @@ -646,6 +723,7 @@ async def test_s3_export_workflow_with_s3_bucket( exclude_events=exclude_events, include_events=None, compression=compression, + file_format=file_format, ) @@ -1206,6 +1284,49 @@ async def never_finish_activity(_: S3InsertInputs) -> str: ), "nested/prefix/2023-01-01 00:00:00-2023-01-01 01:00:00.jsonl.br", ), + ( + S3InsertInputs( + prefix="/nested/prefix/", + data_interval_start="2023-01-01 00:00:00", + data_interval_end="2023-01-01 01:00:00", + file_format="Parquet", + compression="snappy", + **base_inputs, # type: ignore + ), + "nested/prefix/2023-01-01 00:00:00-2023-01-01 01:00:00.parquet.sz", + ), + ( + S3InsertInputs( + prefix="/nested/prefix/", + data_interval_start="2023-01-01 00:00:00", + data_interval_end="2023-01-01 01:00:00", + file_format="Parquet", + **base_inputs, # type: ignore + ), + "nested/prefix/2023-01-01 00:00:00-2023-01-01 01:00:00.parquet", + ), + ( + S3InsertInputs( + prefix="/nested/prefix/", + data_interval_start="2023-01-01 00:00:00", + data_interval_end="2023-01-01 01:00:00", + compression="gzip", + file_format="Parquet", + **base_inputs, # type: ignore + ), + "nested/prefix/2023-01-01 00:00:00-2023-01-01 01:00:00.parquet.gz", + ), + ( + S3InsertInputs( + prefix="/nested/prefix/", + data_interval_start="2023-01-01 00:00:00", + data_interval_end="2023-01-01 01:00:00", + compression="brotli", + file_format="Parquet", + **base_inputs, # type: ignore + ), + "nested/prefix/2023-01-01 00:00:00-2023-01-01 01:00:00.parquet.br", + ), ], ) def test_get_s3_key(inputs, expected): @@ -1271,7 +1392,7 @@ def assert_heartbeat_details(*details): endpoint_url=settings.OBJECT_STORAGE_ENDPOINT, ) - with override_settings(BATCH_EXPORT_S3_UPLOAD_CHUNK_SIZE_BYTES=5 * 1024**2): + with override_settings(BATCH_EXPORT_S3_UPLOAD_CHUNK_SIZE_BYTES=1, CLICKHOUSE_MAX_BLOCK_SIZE_DEFAULT=1): await activity_environment.run(insert_into_s3_activity, insert_inputs) # This checks that the assert_heartbeat_details function was actually called. diff --git a/posthog/temporal/tests/batch_exports/test_temporary_file.py b/posthog/temporal/tests/batch_exports/test_temporary_file.py new file mode 100644 index 0000000000000..4fd7e69c0c12f --- /dev/null +++ b/posthog/temporal/tests/batch_exports/test_temporary_file.py @@ -0,0 +1,389 @@ +import csv +import datetime as dt +import io +import json + +import pyarrow as pa +import pyarrow.parquet as pq +import pytest + +from posthog.temporal.batch_exports.temporary_file import ( + BatchExportTemporaryFile, + CSVBatchExportWriter, + JSONLBatchExportWriter, + ParquetBatchExportWriter, + json_dumps_bytes, +) + + +@pytest.mark.parametrize( + "to_write", + [ + (b"",), + (b"", b""), + (b"12345",), + (b"12345", b"12345"), + (b"abbcccddddeeeee",), + (b"abbcccddddeeeee", b"abbcccddddeeeee"), + ], +) +def test_batch_export_temporary_file_tracks_bytes(to_write): + """Test the bytes written by BatchExportTemporaryFile match expected.""" + with BatchExportTemporaryFile() as be_file: + for content in to_write: + be_file.write(content) + + assert be_file.bytes_total == sum(len(content) for content in to_write) + assert be_file.bytes_since_last_reset == sum(len(content) for content in to_write) + + be_file.reset() + + assert be_file.bytes_total == sum(len(content) for content in to_write) + assert be_file.bytes_since_last_reset == 0 + + +TEST_RECORDS = [ + [], + [ + {"id": "record-1", "property": "value", "property_int": 1}, + {"id": "record-2", "property": "another-value", "property_int": 2}, + { + "id": "record-3", + "property": {"id": "nested-record", "property": "nested-value"}, + "property_int": 3, + }, + ], +] + + +@pytest.mark.parametrize( + "records", + TEST_RECORDS, +) +def test_batch_export_temporary_file_write_records_to_jsonl(records): + """Test JSONL records written by BatchExportTemporaryFile match expected.""" + jsonl_dump = b"\n".join(map(json_dumps_bytes, records)) + + with BatchExportTemporaryFile() as be_file: + be_file.write_records_to_jsonl(records) + + assert be_file.bytes_total == len(jsonl_dump) + assert be_file.bytes_since_last_reset == len(jsonl_dump) + assert be_file.records_total == len(records) + assert be_file.records_since_last_reset == len(records) + + be_file.seek(0) + lines = be_file.readlines() + assert len(lines) == len(records) + + for line_index, jsonl_record in enumerate(lines): + json_loaded = json.loads(jsonl_record) + assert json_loaded == records[line_index] + + be_file.reset() + + assert be_file.bytes_total == len(jsonl_dump) + assert be_file.bytes_since_last_reset == 0 + assert be_file.records_total == len(records) + assert be_file.records_since_last_reset == 0 + + +@pytest.mark.parametrize( + "records", + TEST_RECORDS, +) +def test_batch_export_temporary_file_write_records_to_csv(records): + """Test CSV written by BatchExportTemporaryFile match expected.""" + in_memory_file_obj = io.StringIO() + writer = csv.DictWriter( + in_memory_file_obj, + fieldnames=records[0].keys() if len(records) > 0 else [], + delimiter=",", + quotechar='"', + escapechar="\\", + lineterminator="\n", + quoting=csv.QUOTE_NONE, + ) + writer.writerows(records) + + with BatchExportTemporaryFile(mode="w+") as be_file: + be_file.write_records_to_csv(records) + + assert be_file.bytes_total == in_memory_file_obj.tell() + assert be_file.bytes_since_last_reset == in_memory_file_obj.tell() + assert be_file.records_total == len(records) + assert be_file.records_since_last_reset == len(records) + + be_file.seek(0) + reader = csv.reader( + be_file._file, + delimiter=",", + quotechar='"', + escapechar="\\", + quoting=csv.QUOTE_NONE, + ) + + rows = [row for row in reader] + assert len(rows) == len(records) + + for row_index, csv_record in enumerate(rows): + for value_index, value in enumerate(records[row_index].values()): + # Everything returned by csv.reader is a str. + # This means type information is lost when writing to CSV + # but this just a limitation of the format. + assert csv_record[value_index] == str(value) + + be_file.reset() + + assert be_file.bytes_total == in_memory_file_obj.tell() + assert be_file.bytes_since_last_reset == 0 + assert be_file.records_total == len(records) + assert be_file.records_since_last_reset == 0 + + +@pytest.mark.parametrize( + "records", + TEST_RECORDS, +) +def test_batch_export_temporary_file_write_records_to_tsv(records): + """Test TSV written by BatchExportTemporaryFile match expected.""" + in_memory_file_obj = io.StringIO() + writer = csv.DictWriter( + in_memory_file_obj, + fieldnames=records[0].keys() if len(records) > 0 else [], + delimiter="\t", + quotechar='"', + escapechar="\\", + lineterminator="\n", + quoting=csv.QUOTE_NONE, + ) + writer.writerows(records) + + with BatchExportTemporaryFile(mode="w+") as be_file: + be_file.write_records_to_tsv(records) + + assert be_file.bytes_total == in_memory_file_obj.tell() + assert be_file.bytes_since_last_reset == in_memory_file_obj.tell() + assert be_file.records_total == len(records) + assert be_file.records_since_last_reset == len(records) + + be_file.seek(0) + reader = csv.reader( + be_file._file, + delimiter="\t", + quotechar='"', + escapechar="\\", + quoting=csv.QUOTE_NONE, + ) + + rows = [row for row in reader] + assert len(rows) == len(records) + + for row_index, csv_record in enumerate(rows): + for value_index, value in enumerate(records[row_index].values()): + # Everything returned by csv.reader is a str. + # This means type information is lost when writing to CSV + # but this just a limitation of the format. + assert csv_record[value_index] == str(value) + + be_file.reset() + + assert be_file.bytes_total == in_memory_file_obj.tell() + assert be_file.bytes_since_last_reset == 0 + assert be_file.records_total == len(records) + assert be_file.records_since_last_reset == 0 + + +TEST_RECORD_BATCHES = [ + pa.RecordBatch.from_pydict( + { + "event": pa.array(["test-event-0", "test-event-1", "test-event-2"]), + "properties": pa.array(['{"prop_0": 1, "prop_1": 2}', "{}", "null"]), + "_inserted_at": pa.array([0, 1, 2]), + } + ) +] + + +@pytest.mark.parametrize( + "record_batch", + TEST_RECORD_BATCHES, +) +@pytest.mark.asyncio +async def test_jsonl_writer_writes_record_batches(record_batch): + """Test record batches are written as valid JSONL.""" + in_memory_file_obj = io.BytesIO() + inserted_ats_seen = [] + + async def store_in_memory_on_flush( + batch_export_file, records_since_last_flush, bytes_since_last_flush, last_inserted_at, is_last + ): + in_memory_file_obj.write(batch_export_file.read()) + inserted_ats_seen.append(last_inserted_at) + + writer = JSONLBatchExportWriter(max_bytes=1, flush_callable=store_in_memory_on_flush) + + record_batch = record_batch.sort_by("_inserted_at") + async with writer.open_temporary_file(): + await writer.write_record_batch(record_batch) + + lines = in_memory_file_obj.readlines() + for index, line in enumerate(lines): + written_jsonl = json.loads(line) + + single_record_batch = record_batch.slice(offset=index, length=1) + expected_jsonl = single_record_batch.to_pylist()[0] + + assert "_inserted_at" not in written_jsonl + assert written_jsonl == expected_jsonl + + assert inserted_ats_seen == [record_batch.column("_inserted_at")[-1].as_py()] + + +@pytest.mark.parametrize( + "record_batch", + TEST_RECORD_BATCHES, +) +@pytest.mark.asyncio +async def test_csv_writer_writes_record_batches(record_batch): + """Test record batches are written as valid CSV.""" + in_memory_file_obj = io.StringIO() + inserted_ats_seen = [] + + async def store_in_memory_on_flush( + batch_export_file, records_since_last_flush, bytes_since_last_flush, last_inserted_at, is_last + ): + in_memory_file_obj.write(batch_export_file.read().decode("utf-8")) + inserted_ats_seen.append(last_inserted_at) + + schema_columns = [column_name for column_name in record_batch.column_names if column_name != "_inserted_at"] + writer = CSVBatchExportWriter(max_bytes=1, field_names=schema_columns, flush_callable=store_in_memory_on_flush) + + record_batch = record_batch.sort_by("_inserted_at") + async with writer.open_temporary_file(): + await writer.write_record_batch(record_batch) + + reader = csv.reader( + in_memory_file_obj, + delimiter=",", + quotechar='"', + escapechar="\\", + quoting=csv.QUOTE_NONE, + ) + for index, written_csv_row in enumerate(reader): + single_record_batch = record_batch.slice(offset=index, length=1) + expected_csv = single_record_batch.to_pylist()[0] + + assert "_inserted_at" not in written_csv_row + assert written_csv_row == expected_csv + + assert inserted_ats_seen == [record_batch.column("_inserted_at")[-1].as_py()] + + +@pytest.mark.parametrize( + "record_batch", + TEST_RECORD_BATCHES, +) +@pytest.mark.asyncio +async def test_parquet_writer_writes_record_batches(record_batch): + """Test record batches are written as valid Parquet.""" + in_memory_file_obj = io.BytesIO() + inserted_ats_seen = [] + + async def store_in_memory_on_flush( + batch_export_file, records_since_last_flush, bytes_since_last_flush, last_inserted_at, is_last + ): + in_memory_file_obj.write(batch_export_file.read()) + inserted_ats_seen.append(last_inserted_at) + + schema_columns = [column_name for column_name in record_batch.column_names if column_name != "_inserted_at"] + + writer = ParquetBatchExportWriter( + max_bytes=1, + flush_callable=store_in_memory_on_flush, + schema=record_batch.select(schema_columns).schema, + ) + + record_batch = record_batch.sort_by("_inserted_at") + async with writer.open_temporary_file(): + await writer.write_record_batch(record_batch) + + written_parquet = pq.read_table(in_memory_file_obj) + + for index, written_row_as_dict in enumerate(written_parquet.to_pylist()): + single_record_batch = record_batch.slice(offset=index, length=1) + expected_row_as_dict = single_record_batch.select(schema_columns).to_pylist()[0] + + assert "_inserted_at" not in written_row_as_dict + assert written_row_as_dict == expected_row_as_dict + + # NOTE: Parquet gets flushed twice due to the extra flush at the end for footer bytes, so our mock function + # will see this value twice. + assert inserted_ats_seen == [ + record_batch.column("_inserted_at")[-1].as_py(), + record_batch.column("_inserted_at")[-1].as_py(), + ] + + +@pytest.mark.parametrize( + "record_batch", + TEST_RECORD_BATCHES, +) +@pytest.mark.asyncio +async def test_writing_out_of_scope_of_temporary_file_raises(record_batch): + """Test attempting a write out of temporary file scope raises a `ValueError`.""" + + async def do_nothing(*args, **kwargs): + pass + + schema_columns = [column_name for column_name in record_batch.column_names if column_name != "_inserted_at"] + writer = ParquetBatchExportWriter( + max_bytes=10, + flush_callable=do_nothing, + schema=record_batch.select(schema_columns).schema, + ) + + async with writer.open_temporary_file(): + pass + + with pytest.raises(ValueError, match="Batch export file is closed"): + await writer.write_record_batch(record_batch) + + +@pytest.mark.parametrize( + "record_batch", + TEST_RECORD_BATCHES, +) +@pytest.mark.asyncio +async def test_flushing_parquet_writer_resets_underlying_file(record_batch): + """Test flushing a writer resets underlying file.""" + flush_counter = 0 + + async def track_flushes(*args, **kwargs): + nonlocal flush_counter + flush_counter += 1 + + schema_columns = [column_name for column_name in record_batch.column_names if column_name != "_inserted_at"] + writer = ParquetBatchExportWriter( + max_bytes=10000000, + flush_callable=track_flushes, + schema=record_batch.select(schema_columns).schema, + ) + + async with writer.open_temporary_file(): + await writer.write_record_batch(record_batch) + + assert writer.batch_export_file.tell() > 0 + assert writer.bytes_since_last_flush > 0 + assert writer.bytes_since_last_flush == writer.batch_export_file.bytes_since_last_reset + assert writer.records_since_last_flush == record_batch.num_rows + + await writer.flush(dt.datetime.now()) + + assert flush_counter == 1 + assert writer.batch_export_file.tell() == 0 + assert writer.bytes_since_last_flush == 0 + assert writer.bytes_since_last_flush == writer.batch_export_file.bytes_since_last_reset + assert writer.records_since_last_flush == 0 + + assert flush_counter == 2 From fcc192e83db4273e0904993922cba9a62c2d3752 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Far=C3=ADas=20Santana?= Date: Fri, 22 Mar 2024 14:51:00 +0100 Subject: [PATCH 14/15] feat: Frontend support for file_format in S3 batch export (#21076) * refactor: Support for multiple file formats in batch exports * refactor: Prefer composition over inheritance * refactor: More clearly separate writer from temporary file We now should be more explicit about what is the context in which the batch export temporary file is alive. The writer outlives this context, so it can be used by callers to, for example, check how many records were written. * test: More parquet testing * refactor: Move temporary file to new module * feat: Frontend support for file_format * fix: Remove redefinition caused by rebase --- .../src/scenes/batch_exports/BatchExportEditForm.tsx | 9 +++++++++ .../src/scenes/batch_exports/BatchExports.stories.tsx | 1 + .../src/scenes/batch_exports/batchExportEditLogic.ts | 1 + frontend/src/types.ts | 1 + 4 files changed, 12 insertions(+) diff --git a/frontend/src/scenes/batch_exports/BatchExportEditForm.tsx b/frontend/src/scenes/batch_exports/BatchExportEditForm.tsx index b015659cfaef1..a2a9f9968f82c 100644 --- a/frontend/src/scenes/batch_exports/BatchExportEditForm.tsx +++ b/frontend/src/scenes/batch_exports/BatchExportEditForm.tsx @@ -242,6 +242,15 @@ export function BatchExportsEditFields({ ]} /> + + + +
diff --git a/frontend/src/scenes/batch_exports/BatchExports.stories.tsx b/frontend/src/scenes/batch_exports/BatchExports.stories.tsx index 0dd616c44982a..dbd6779cb208d 100644 --- a/frontend/src/scenes/batch_exports/BatchExports.stories.tsx +++ b/frontend/src/scenes/batch_exports/BatchExports.stories.tsx @@ -42,6 +42,7 @@ export default { include_events: [], encryption: null, kms_key_id: null, + file_format: 'JSONLines', }, }, start_at: null, diff --git a/frontend/src/scenes/batch_exports/batchExportEditLogic.ts b/frontend/src/scenes/batch_exports/batchExportEditLogic.ts index bc86d1618fe4f..30c123256d81a 100644 --- a/frontend/src/scenes/batch_exports/batchExportEditLogic.ts +++ b/frontend/src/scenes/batch_exports/batchExportEditLogic.ts @@ -90,6 +90,7 @@ export const batchExportFormFields = ( aws_secret_access_key: isNew ? (!config.aws_secret_access_key ? 'This field is required' : '') : '', compression: '', encryption: '', + file_format: isNew ? (!config.file_format ? 'This field is required' : '') : '', kms_key_id: !config.kms_key_id && config.encryption == 'aws:kms' ? 'This field is required' : '', exclude_events: '', include_events: '', diff --git a/frontend/src/types.ts b/frontend/src/types.ts index be96494253ae8..58f54b786a7df 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -3597,6 +3597,7 @@ export type BatchExportDestinationS3 = { encryption: string | null kms_key_id: string | null endpoint_url: string | null + file_format: string } } From 17c5876ef880f3771af2f4a08f88b01e9bb752a9 Mon Sep 17 00:00:00 2001 From: Xavier Vello Date: Fri, 22 Mar 2024 15:05:54 +0100 Subject: [PATCH 15/15] chore(blobby): raise overflow trigger again (#21104) --- plugin-server/src/config/config.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugin-server/src/config/config.ts b/plugin-server/src/config/config.ts index dcaebe4c1097a..def72eea474bb 100644 --- a/plugin-server/src/config/config.ts +++ b/plugin-server/src/config/config.ts @@ -164,8 +164,8 @@ export function getDefaultConfig(): PluginsServerConfig { SESSION_RECORDING_KAFKA_DEBUG: undefined, SESSION_RECORDING_MAX_PARALLEL_FLUSHES: 10, SESSION_RECORDING_OVERFLOW_ENABLED: false, - SESSION_RECORDING_OVERFLOW_BUCKET_REPLENISH_RATE: 2_000_000, // 2MB/second uncompressed, sustained - SESSION_RECORDING_OVERFLOW_BUCKET_CAPACITY: 100_000_000, // 100MB burst + SESSION_RECORDING_OVERFLOW_BUCKET_REPLENISH_RATE: 5_000_000, // 5MB/second uncompressed, sustained + SESSION_RECORDING_OVERFLOW_BUCKET_CAPACITY: 200_000_000, // 200MB burst } }