From 82d22cc7e2e4d3d6ddc7c228082938d0830c77e6 Mon Sep 17 00:00:00 2001 From: Marius Andra Date: Thu, 14 Dec 2023 09:42:56 +0100 Subject: [PATCH] feat(trends): support numeric breakdowns (#19270) --- frontend/src/scenes/insights/utils.tsx | 9 ++- .../views/InsightsTable/InsightsTable.tsx | 3 + frontend/src/scenes/trends/trendsDataLogic.ts | 8 ++- posthog/queries/trends/breakdown.py | 19 ++++-- .../test/__snapshots__/test_breakdowns.ambr | 67 ++++++++++++++++++- .../queries/trends/test/test_breakdowns.py | 32 +++++++-- 6 files changed, 125 insertions(+), 13 deletions(-) diff --git a/frontend/src/scenes/insights/utils.tsx b/frontend/src/scenes/insights/utils.tsx index 5eeff4601448f..313857cafa5d6 100644 --- a/frontend/src/scenes/insights/utils.tsx +++ b/frontend/src/scenes/insights/utils.tsx @@ -214,6 +214,10 @@ export function formatAggregationValue( return Array.isArray(formattedValue) ? formattedValue[0] : formattedValue } +// NB! Sync this with breakdown.py +export const BREAKDOWN_OTHER_STRING_LABEL = '$$_posthog_breakdown_other_$$' +export const BREAKDOWN_OTHER_NUMERIC_LABEL = 9007199254740991 // pow(2, 53) - 1 + export function formatBreakdownLabel( cohorts: CohortType[] | undefined, formatPropertyValueForDisplay: FormatPropertyValueForDisplayFunction | undefined, @@ -249,11 +253,14 @@ export function formatBreakdownLabel( } return cohorts?.filter((c) => c.id == breakdown_value)[0]?.name ?? (breakdown_value || '').toString() } else if (typeof breakdown_value == 'number') { + if (breakdown_value === BREAKDOWN_OTHER_NUMERIC_LABEL) { + return 'Other' + } return formatPropertyValueForDisplay ? formatPropertyValueForDisplay(breakdown, breakdown_value)?.toString() ?? 'None' : breakdown_value.toString() } else if (typeof breakdown_value == 'string') { - return breakdown_value === 'nan' || breakdown_value === '$$_posthog_breakdown_other_$$' + return breakdown_value === 'nan' || breakdown_value === BREAKDOWN_OTHER_STRING_LABEL ? 'Other' : breakdown_value === '' ? 'None' diff --git a/frontend/src/scenes/insights/views/InsightsTable/InsightsTable.tsx b/frontend/src/scenes/insights/views/InsightsTable/InsightsTable.tsx index 004bf853b61ca..965e19b7d278b 100644 --- a/frontend/src/scenes/insights/views/InsightsTable/InsightsTable.tsx +++ b/frontend/src/scenes/insights/views/InsightsTable/InsightsTable.tsx @@ -153,6 +153,9 @@ export function InsightsTable({ ), key: 'breakdown', sorter: (a, b) => { + if (typeof a.breakdown_value === 'number' && typeof b.breakdown_value === 'number') { + return a.breakdown_value - b.breakdown_value + } const labelA = formatItemBreakdownLabel(a) const labelB = formatItemBreakdownLabel(b) return labelA.localeCompare(labelB) diff --git a/frontend/src/scenes/trends/trendsDataLogic.ts b/frontend/src/scenes/trends/trendsDataLogic.ts index 9343106b51849..a08ea03d70865 100644 --- a/frontend/src/scenes/trends/trendsDataLogic.ts +++ b/frontend/src/scenes/trends/trendsDataLogic.ts @@ -3,6 +3,7 @@ import api from 'lib/api' import { dayjs } from 'lib/dayjs' import { insightVizDataLogic } from 'scenes/insights/insightVizDataLogic' import { keyForInsightLogicProps } from 'scenes/insights/sharedUtils' +import { BREAKDOWN_OTHER_NUMERIC_LABEL, BREAKDOWN_OTHER_STRING_LABEL } from 'scenes/insights/utils' import { ChartDisplayType, InsightLogicProps, LifecycleToggle, TrendAPIResponse, TrendResult } from '~/types' @@ -85,7 +86,12 @@ export const trendsDataLogic = kea([ } const results = insightData.result ?? insightData.results return !!( - Array.isArray(results) && results.find((r) => r.breakdown_value === '$$_posthog_breakdown_other_$$') + Array.isArray(results) && + results.find( + (r) => + r.breakdown_value === BREAKDOWN_OTHER_STRING_LABEL || + r.breakdown_value === BREAKDOWN_OTHER_NUMERIC_LABEL + ) ) }, ], diff --git a/posthog/queries/trends/breakdown.py b/posthog/queries/trends/breakdown.py index 34335e636f9ae..51f27d6bf1e0b 100644 --- a/posthog/queries/trends/breakdown.py +++ b/posthog/queries/trends/breakdown.py @@ -81,6 +81,9 @@ ) from posthog.queries.person_on_events_v2_sql import PERSON_OVERRIDES_JOIN_SQL +BREAKDOWN_OTHER_STRING_LABEL = "$$_posthog_breakdown_other_$$" +BREAKDOWN_OTHER_NUMERIC_LABEL = 9007199254740991 # pow(2, 53) - 1, for JS compatibility + class TrendsBreakdown: DISTINCT_ID_TABLE_ALIAS = EventQuery.DISTINCT_ID_TABLE_ALIAS @@ -448,6 +451,7 @@ def _breakdown_prop_params(self, aggregate_operation: str, math_params: Dict): assert isinstance(self.filter.breakdown, str) breakdown_value = self._get_breakdown_value(self.filter.breakdown) + breakdown_other_value: str | int = BREAKDOWN_OTHER_STRING_LABEL numeric_property_filter = "" if self.filter.using_histogram: numeric_property_filter = f"AND {breakdown_value} is not null" @@ -457,13 +461,18 @@ def _breakdown_prop_params(self, aggregate_operation: str, math_params: Dict): # Not adding "Other" for the custom session duration filter. pass else: - # Adding "Other" breakdown option - breakdown_value = ( - f"transform({breakdown_value}, (%(values)s), (%(values)s), '$$_posthog_breakdown_other_$$')" - ) + all_values_are_numeric = all(isinstance(value, int) or isinstance(value, float) for value in values_arr) + all_values_are_string = all(isinstance(value, str) for value in values_arr) + + if all_values_are_numeric: + breakdown_other_value = BREAKDOWN_OTHER_NUMERIC_LABEL + elif not all_values_are_string: + breakdown_value = f"toString({breakdown_value})" + + breakdown_value = f"transform({breakdown_value}, (%(values)s), (%(values)s), %(other_value)s)" return ( - {"values": values_arr}, + {"values": values_arr, "other_value": breakdown_other_value}, BREAKDOWN_PROP_JOIN_SQL if not self.filter.using_histogram else BREAKDOWN_HISTOGRAM_PROP_JOIN_SQL, { "breakdown_value_expr": breakdown_value, diff --git a/posthog/queries/trends/test/__snapshots__/test_breakdowns.ambr b/posthog/queries/trends/test/__snapshots__/test_breakdowns.ambr index e73af7d866008..35b2ecc156fe6 100644 --- a/posthog/queries/trends/test/__snapshots__/test_breakdowns.ambr +++ b/posthog/queries/trends/test/__snapshots__/test_breakdowns.ambr @@ -181,13 +181,13 @@ CROSS JOIN (SELECT breakdown_value FROM - (SELECT ['', 'https://example.com'] as breakdown_value) ARRAY + (SELECT ['', 'https://example.com/', 'https://example.com'] as breakdown_value) ARRAY JOIN breakdown_value) as sec ORDER BY breakdown_value, day_start UNION ALL SELECT count(*) as total, toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) as day_start, - transform(replaceRegexpAll(JSONExtractRaw(properties, '$current_url'), '^"|"$', ''), (['', 'https://example.com']), (['', 'https://example.com']), '$$_posthog_breakdown_other_$$') as breakdown_value + transform(replaceRegexpAll(JSONExtractRaw(properties, '$current_url'), '^"|"$', ''), (['', 'https://example.com/', 'https://example.com']), (['', 'https://example.com/', 'https://example.com']), '$$_posthog_breakdown_other_$$') as breakdown_value FROM events e INNER JOIN (SELECT "$session_id", @@ -660,3 +660,66 @@ ORDER BY breakdown_value ' --- +# name: TestBreakdowns.test_breakdown_numeric_hogql_to_string + ' + + SELECT groupArray(value) + FROM + (SELECT length(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$current_url'), ''), 'null'), '^"|"$', '')) AS value, + count(*) as count + FROM events e + WHERE team_id = 2 + AND event = 'watched movie' + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-02 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-12 23:59:59', 'UTC') + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 25 + OFFSET 0) + ' +--- +# name: TestBreakdowns.test_breakdown_numeric_hogql_to_string.1 + ' + + SELECT groupArray(day_start) as date, + groupArray(count) AS total, + breakdown_value + FROM + (SELECT SUM(total) as count, + day_start, + breakdown_value + FROM + (SELECT * + FROM + (SELECT toUInt16(0) AS total, + ticks.day_start as day_start, + breakdown_value + FROM + (SELECT toStartOfDay(toDateTime('2020-01-12 23:59:59', 'UTC')) - toIntervalDay(number) as day_start + FROM numbers(11) + UNION ALL SELECT toStartOfDay(toDateTime('2020-01-02 00:00:00', 'UTC')) as day_start) as ticks + CROSS JOIN + (SELECT breakdown_value + FROM + (SELECT [1, 2] as breakdown_value) ARRAY + JOIN breakdown_value) as sec + ORDER BY breakdown_value, + day_start + UNION ALL SELECT count(*) as total, + toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) as day_start, + transform(length(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$current_url'), ''), 'null'), '^"|"$', '')), ([19, 20]), ([19, 20]), 9007199254740991) as breakdown_value + FROM events e + WHERE e.team_id = 2 + AND event = 'watched movie' + AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2020-01-02 00:00:00', 'UTC')), 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-12 23:59:59', 'UTC') + GROUP BY day_start, + breakdown_value)) + GROUP BY day_start, + breakdown_value + ORDER BY breakdown_value, + day_start) + GROUP BY breakdown_value + ORDER BY breakdown_value + ' +--- diff --git a/posthog/queries/trends/test/test_breakdowns.py b/posthog/queries/trends/test/test_breakdowns.py index 29c917e4dd843..b9856947f4dc3 100644 --- a/posthog/queries/trends/test/test_breakdowns.py +++ b/posthog/queries/trends/test/test_breakdowns.py @@ -3,6 +3,7 @@ from posthog.constants import TRENDS_TABLE from posthog.models import Filter +from posthog.queries.trends.breakdown import BREAKDOWN_OTHER_NUMERIC_LABEL from posthog.queries.trends.trends import Trends from posthog.test.base import ( APIBaseTest, @@ -45,7 +46,7 @@ def setUp(self): "properties": { "$session_id": "2", "movie_length": 75, - "$current_url": "https://example.com", + "$current_url": "https://example.com/", }, }, ], @@ -117,7 +118,8 @@ def _run(self, extra: Dict = {}, events_extra: Dict = {}): "date_from": "2020-01-02T00:00:00Z", "date_to": "2020-01-12T00:00:00Z", **extra, - } + }, + team=self.team, ), self.team, ) @@ -458,8 +460,13 @@ def test_breakdown_by_event_property_with_entity_session_filter(self): ("", 6.0, [1.0, 0.0, 1.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]), ( "https://example.com", - 2.0, - [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + 1.0, + [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + ), + ( + "https://example.com/", + 1.0, + [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], ), ], ) @@ -484,3 +491,20 @@ def test_breakdown_histogram_by_missing_property_regression(self): ), ], ) + + @snapshot_clickhouse_queries + def test_breakdown_numeric_hogql_to_string(self): + response = self._run( + { + "breakdown": "length(properties.$current_url)", + "breakdown_type": "hogql", + }, + ) + self.assertEqual( + [(item["breakdown_value"], item["count"], item["data"]) for item in response], + [ + (BREAKDOWN_OTHER_NUMERIC_LABEL, 6.0, [1.0, 1.0, 4.0]), + (19, 2.0, [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]), + (20, 1.0, [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]), + ], + )