Skip to content

Commit

Permalink
feat(trends): support numeric breakdowns (#19270)
Browse files Browse the repository at this point in the history
  • Loading branch information
mariusandra authored Dec 14, 2023
1 parent 313a7ec commit 82d22cc
Show file tree
Hide file tree
Showing 6 changed files with 125 additions and 13 deletions.
9 changes: 8 additions & 1 deletion frontend/src/scenes/insights/utils.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,10 @@ export function formatAggregationValue(
return Array.isArray(formattedValue) ? formattedValue[0] : formattedValue
}

// NB! Sync this with breakdown.py
export const BREAKDOWN_OTHER_STRING_LABEL = '$$_posthog_breakdown_other_$$'
export const BREAKDOWN_OTHER_NUMERIC_LABEL = 9007199254740991 // pow(2, 53) - 1

export function formatBreakdownLabel(
cohorts: CohortType[] | undefined,
formatPropertyValueForDisplay: FormatPropertyValueForDisplayFunction | undefined,
Expand Down Expand Up @@ -249,11 +253,14 @@ export function formatBreakdownLabel(
}
return cohorts?.filter((c) => c.id == breakdown_value)[0]?.name ?? (breakdown_value || '').toString()
} else if (typeof breakdown_value == 'number') {
if (breakdown_value === BREAKDOWN_OTHER_NUMERIC_LABEL) {
return 'Other'
}
return formatPropertyValueForDisplay
? formatPropertyValueForDisplay(breakdown, breakdown_value)?.toString() ?? 'None'
: breakdown_value.toString()
} else if (typeof breakdown_value == 'string') {
return breakdown_value === 'nan' || breakdown_value === '$$_posthog_breakdown_other_$$'
return breakdown_value === 'nan' || breakdown_value === BREAKDOWN_OTHER_STRING_LABEL
? 'Other'
: breakdown_value === ''
? 'None'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,9 @@ export function InsightsTable({
),
key: 'breakdown',
sorter: (a, b) => {
if (typeof a.breakdown_value === 'number' && typeof b.breakdown_value === 'number') {
return a.breakdown_value - b.breakdown_value
}
const labelA = formatItemBreakdownLabel(a)
const labelB = formatItemBreakdownLabel(b)
return labelA.localeCompare(labelB)
Expand Down
8 changes: 7 additions & 1 deletion frontend/src/scenes/trends/trendsDataLogic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import api from 'lib/api'
import { dayjs } from 'lib/dayjs'
import { insightVizDataLogic } from 'scenes/insights/insightVizDataLogic'
import { keyForInsightLogicProps } from 'scenes/insights/sharedUtils'
import { BREAKDOWN_OTHER_NUMERIC_LABEL, BREAKDOWN_OTHER_STRING_LABEL } from 'scenes/insights/utils'

import { ChartDisplayType, InsightLogicProps, LifecycleToggle, TrendAPIResponse, TrendResult } from '~/types'

Expand Down Expand Up @@ -85,7 +86,12 @@ export const trendsDataLogic = kea<trendsDataLogicType>([
}
const results = insightData.result ?? insightData.results
return !!(
Array.isArray(results) && results.find((r) => r.breakdown_value === '$$_posthog_breakdown_other_$$')
Array.isArray(results) &&
results.find(
(r) =>
r.breakdown_value === BREAKDOWN_OTHER_STRING_LABEL ||
r.breakdown_value === BREAKDOWN_OTHER_NUMERIC_LABEL
)
)
},
],
Expand Down
19 changes: 14 additions & 5 deletions posthog/queries/trends/breakdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@
)
from posthog.queries.person_on_events_v2_sql import PERSON_OVERRIDES_JOIN_SQL

BREAKDOWN_OTHER_STRING_LABEL = "$$_posthog_breakdown_other_$$"
BREAKDOWN_OTHER_NUMERIC_LABEL = 9007199254740991 # pow(2, 53) - 1, for JS compatibility


class TrendsBreakdown:
DISTINCT_ID_TABLE_ALIAS = EventQuery.DISTINCT_ID_TABLE_ALIAS
Expand Down Expand Up @@ -448,6 +451,7 @@ def _breakdown_prop_params(self, aggregate_operation: str, math_params: Dict):
assert isinstance(self.filter.breakdown, str)

breakdown_value = self._get_breakdown_value(self.filter.breakdown)
breakdown_other_value: str | int = BREAKDOWN_OTHER_STRING_LABEL
numeric_property_filter = ""
if self.filter.using_histogram:
numeric_property_filter = f"AND {breakdown_value} is not null"
Expand All @@ -457,13 +461,18 @@ def _breakdown_prop_params(self, aggregate_operation: str, math_params: Dict):
# Not adding "Other" for the custom session duration filter.
pass
else:
# Adding "Other" breakdown option
breakdown_value = (
f"transform({breakdown_value}, (%(values)s), (%(values)s), '$$_posthog_breakdown_other_$$')"
)
all_values_are_numeric = all(isinstance(value, int) or isinstance(value, float) for value in values_arr)
all_values_are_string = all(isinstance(value, str) for value in values_arr)

if all_values_are_numeric:
breakdown_other_value = BREAKDOWN_OTHER_NUMERIC_LABEL
elif not all_values_are_string:
breakdown_value = f"toString({breakdown_value})"

breakdown_value = f"transform({breakdown_value}, (%(values)s), (%(values)s), %(other_value)s)"

return (
{"values": values_arr},
{"values": values_arr, "other_value": breakdown_other_value},
BREAKDOWN_PROP_JOIN_SQL if not self.filter.using_histogram else BREAKDOWN_HISTOGRAM_PROP_JOIN_SQL,
{
"breakdown_value_expr": breakdown_value,
Expand Down
67 changes: 65 additions & 2 deletions posthog/queries/trends/test/__snapshots__/test_breakdowns.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -181,13 +181,13 @@
CROSS JOIN
(SELECT breakdown_value
FROM
(SELECT ['', 'https://example.com'] as breakdown_value) ARRAY
(SELECT ['', 'https://example.com/', 'https://example.com'] as breakdown_value) ARRAY
JOIN breakdown_value) as sec
ORDER BY breakdown_value,
day_start
UNION ALL SELECT count(*) as total,
toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) as day_start,
transform(replaceRegexpAll(JSONExtractRaw(properties, '$current_url'), '^"|"$', ''), (['', 'https://example.com']), (['', 'https://example.com']), '$$_posthog_breakdown_other_$$') as breakdown_value
transform(replaceRegexpAll(JSONExtractRaw(properties, '$current_url'), '^"|"$', ''), (['', 'https://example.com/', 'https://example.com']), (['', 'https://example.com/', 'https://example.com']), '$$_posthog_breakdown_other_$$') as breakdown_value
FROM events e
INNER JOIN
(SELECT "$session_id",
Expand Down Expand Up @@ -660,3 +660,66 @@
ORDER BY breakdown_value
'
---
# name: TestBreakdowns.test_breakdown_numeric_hogql_to_string
'

SELECT groupArray(value)
FROM
(SELECT length(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$current_url'), ''), 'null'), '^"|"$', '')) AS value,
count(*) as count
FROM events e
WHERE team_id = 2
AND event = 'watched movie'
AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-02 00:00:00', 'UTC')
AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-12 23:59:59', 'UTC')
GROUP BY value
ORDER BY count DESC, value DESC
LIMIT 25
OFFSET 0)
'
---
# name: TestBreakdowns.test_breakdown_numeric_hogql_to_string.1
'

SELECT groupArray(day_start) as date,
groupArray(count) AS total,
breakdown_value
FROM
(SELECT SUM(total) as count,
day_start,
breakdown_value
FROM
(SELECT *
FROM
(SELECT toUInt16(0) AS total,
ticks.day_start as day_start,
breakdown_value
FROM
(SELECT toStartOfDay(toDateTime('2020-01-12 23:59:59', 'UTC')) - toIntervalDay(number) as day_start
FROM numbers(11)
UNION ALL SELECT toStartOfDay(toDateTime('2020-01-02 00:00:00', 'UTC')) as day_start) as ticks
CROSS JOIN
(SELECT breakdown_value
FROM
(SELECT [1, 2] as breakdown_value) ARRAY
JOIN breakdown_value) as sec
ORDER BY breakdown_value,
day_start
UNION ALL SELECT count(*) as total,
toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) as day_start,
transform(length(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$current_url'), ''), 'null'), '^"|"$', '')), ([19, 20]), ([19, 20]), 9007199254740991) as breakdown_value
FROM events e
WHERE e.team_id = 2
AND event = 'watched movie'
AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2020-01-02 00:00:00', 'UTC')), 'UTC')
AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-12 23:59:59', 'UTC')
GROUP BY day_start,
breakdown_value))
GROUP BY day_start,
breakdown_value
ORDER BY breakdown_value,
day_start)
GROUP BY breakdown_value
ORDER BY breakdown_value
'
---
32 changes: 28 additions & 4 deletions posthog/queries/trends/test/test_breakdowns.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from posthog.constants import TRENDS_TABLE
from posthog.models import Filter
from posthog.queries.trends.breakdown import BREAKDOWN_OTHER_NUMERIC_LABEL
from posthog.queries.trends.trends import Trends
from posthog.test.base import (
APIBaseTest,
Expand Down Expand Up @@ -45,7 +46,7 @@ def setUp(self):
"properties": {
"$session_id": "2",
"movie_length": 75,
"$current_url": "https://example.com",
"$current_url": "https://example.com/",
},
},
],
Expand Down Expand Up @@ -117,7 +118,8 @@ def _run(self, extra: Dict = {}, events_extra: Dict = {}):
"date_from": "2020-01-02T00:00:00Z",
"date_to": "2020-01-12T00:00:00Z",
**extra,
}
},
team=self.team,
),
self.team,
)
Expand Down Expand Up @@ -458,8 +460,13 @@ def test_breakdown_by_event_property_with_entity_session_filter(self):
("", 6.0, [1.0, 0.0, 1.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]),
(
"https://example.com",
2.0,
[2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
1.0,
[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
),
(
"https://example.com/",
1.0,
[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
),
],
)
Expand All @@ -484,3 +491,20 @@ def test_breakdown_histogram_by_missing_property_regression(self):
),
],
)

@snapshot_clickhouse_queries
def test_breakdown_numeric_hogql_to_string(self):
response = self._run(
{
"breakdown": "length(properties.$current_url)",
"breakdown_type": "hogql",
},
)
self.assertEqual(
[(item["breakdown_value"], item["count"], item["data"]) for item in response],
[
(BREAKDOWN_OTHER_NUMERIC_LABEL, 6.0, [1.0, 1.0, 4.0]),
(19, 2.0, [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]),
(20, 1.0, [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]),
],
)

0 comments on commit 82d22cc

Please sign in to comment.