Skip to content

Commit

Permalink
fix(insights): Fix breakdown limit for total values (#23036)
Browse files Browse the repository at this point in the history
  • Loading branch information
webjunkie authored Jun 18, 2024
1 parent 9f0d9a8 commit 3f90216
Show file tree
Hide file tree
Showing 6 changed files with 411 additions and 1,490 deletions.
1,673 changes: 277 additions & 1,396 deletions posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
'''
SELECT groupArray(1)(date)[1] AS date,
arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total,
if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value
if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', toString(breakdown_value)) AS breakdown_value
FROM
(SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))))), 1))) AS date,
arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x)
Expand All @@ -27,6 +27,7 @@
ORDER BY day_start ASC, breakdown_value ASC)
GROUP BY breakdown_value
ORDER BY arraySum(total) DESC, breakdown_value ASC)
WHERE isNotNull(breakdown_value)
GROUP BY breakdown_value
ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC
LIMIT 100 SETTINGS readonly=2,
Expand All @@ -39,44 +40,11 @@
max_bytes_before_external_group_by=0
'''
# ---
# name: TestTrendsDataWarehouseQuery.test_trends_breakdown.1
'''
SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))))), 1))) AS date,
arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x)
and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total,
ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value
FROM
(SELECT sum(total) AS count,
day_start AS day_start,
breakdown_value AS breakdown_value
FROM
(SELECT count(e.id) AS total,
toStartOfDay(toTimeZone(e.created, 'UTC')) AS day_start,
transform(ifNull(nullIf(toString(e.prop_1), ''), '$$_posthog_breakdown_null_$$'), ['d', 'c', 'b', 'a'], ['d', 'c', 'b', 'a'], '$$_posthog_breakdown_other_$$') AS breakdown_value
FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', '`id` String, `prop_1` String, `prop_2` String, `created` DateTime64(3, \'UTC\')') AS e
WHERE and(ifNull(greaterOrEquals(toTimeZone(e.created, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toTimeZone(e.created, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), true)
GROUP BY day_start,
breakdown_value)
GROUP BY day_start,
breakdown_value
ORDER BY day_start ASC, breakdown_value ASC)
GROUP BY breakdown_value
ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)),
arraySum(total) DESC, breakdown_value ASC
LIMIT 100 SETTINGS readonly=2,
max_execution_time=60,
allow_experimental_object_type=1,
format_csv_allow_double_quotes=0,
max_ast_elements=1000000,
max_expanded_ast_elements=1000000,
max_query_size=524288
'''
# ---
# name: TestTrendsDataWarehouseQuery.test_trends_breakdown_with_property
'''
SELECT groupArray(1)(date)[1] AS date,
arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total,
if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value
if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', toString(breakdown_value)) AS breakdown_value
FROM
(SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))))), 1))) AS date,
arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x)
Expand All @@ -100,6 +68,7 @@
ORDER BY day_start ASC, breakdown_value ASC)
GROUP BY breakdown_value
ORDER BY arraySum(total) DESC, breakdown_value ASC)
WHERE isNotNull(breakdown_value)
GROUP BY breakdown_value
ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC
LIMIT 100 SETTINGS readonly=2,
Expand All @@ -112,39 +81,6 @@
max_bytes_before_external_group_by=0
'''
# ---
# name: TestTrendsDataWarehouseQuery.test_trends_breakdown_with_property.1
'''
SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))))), 1))) AS date,
arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x)
and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total,
ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value
FROM
(SELECT sum(total) AS count,
day_start AS day_start,
breakdown_value AS breakdown_value
FROM
(SELECT count(e.id) AS total,
toStartOfDay(toTimeZone(e.created, 'UTC')) AS day_start,
transform(ifNull(nullIf(toString(e.prop_1), ''), '$$_posthog_breakdown_null_$$'), ['a'], ['a'], '$$_posthog_breakdown_other_$$') AS breakdown_value
FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', '`id` String, `prop_1` String, `prop_2` String, `created` DateTime64(3, \'UTC\')') AS e
WHERE and(ifNull(greaterOrEquals(toTimeZone(e.created, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toTimeZone(e.created, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), equals(e.prop_1, 'a'), true)
GROUP BY day_start,
breakdown_value)
GROUP BY day_start,
breakdown_value
ORDER BY day_start ASC, breakdown_value ASC)
GROUP BY breakdown_value
ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)),
arraySum(total) DESC, breakdown_value ASC
LIMIT 100 SETTINGS readonly=2,
max_execution_time=60,
allow_experimental_object_type=1,
format_csv_allow_double_quotes=0,
max_ast_elements=1000000,
max_expanded_ast_elements=1000000,
max_query_size=524288
'''
# ---
# name: TestTrendsDataWarehouseQuery.test_trends_data_warehouse
'''
SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))))), 1))) AS date,
Expand Down
22 changes: 11 additions & 11 deletions posthog/hogql_queries/insights/trends/test/test_trends.py
Original file line number Diff line number Diff line change
Expand Up @@ -1604,7 +1604,7 @@ def test_trends_breakdown_with_session_property_single_aggregate_math_and_breakd
# empty has: 1 seconds
self.assertEqual(
[resp["breakdown_value"] for resp in daily_response],
["value1", "value2", "$$_posthog_breakdown_null_$$"],
["value2", "value1", "$$_posthog_breakdown_null_$$"],
)
self.assertEqual(sorted([resp["aggregated_value"] for resp in daily_response]), sorted([12.5, 10, 1]))

Expand Down Expand Up @@ -6499,9 +6499,9 @@ def test_breakdown_filtering_bar_chart_by_value(self):
self.team,
)

self.assertEqual(response[0]["aggregated_value"], 2)
self.assertEqual(response[0]["aggregated_value"], 1)
self.assertEqual(response[1]["aggregated_value"], 1)
self.assertEqual(response[2]["aggregated_value"], 1) # the events without breakdown value
self.assertEqual(response[2]["aggregated_value"], 2) # the events without breakdown value
self.assertEqual(response[0]["days"], [])

@also_test_with_materialized_columns(person_properties=["key", "key_2"], verify_no_jsonextract=False)
Expand Down Expand Up @@ -7662,11 +7662,11 @@ def test_trends_count_per_user_average_aggregated_with_event_property_breakdown(
)

assert len(daily_response) == 3
assert daily_response[0]["breakdown_value"] == "blue"
assert daily_response[1]["breakdown_value"] == "red"
assert daily_response[0]["breakdown_value"] == "red"
assert daily_response[1]["breakdown_value"] == "blue"
assert daily_response[2]["breakdown_value"] == "$$_posthog_breakdown_null_$$"
assert daily_response[0]["aggregated_value"] == 1.0 # blue
assert daily_response[1]["aggregated_value"] == 2.0 # red
assert daily_response[0]["aggregated_value"] == 2.0 # red
assert daily_response[1]["aggregated_value"] == 1.0 # blue
assert daily_response[2]["aggregated_value"] == 1.0 # $$_posthog_breakdown_null_$$

@snapshot_clickhouse_queries
Expand All @@ -7689,11 +7689,11 @@ def test_trends_count_per_user_average_aggregated_with_event_property_breakdown_
)

assert len(daily_response) == 3
assert daily_response[0]["breakdown_value"] == "blue"
assert daily_response[1]["breakdown_value"] == "red"
assert daily_response[0]["breakdown_value"] == "red"
assert daily_response[1]["breakdown_value"] == "blue"
assert daily_response[2]["breakdown_value"] == "$$_posthog_breakdown_null_$$"
assert daily_response[0]["aggregated_value"] == 1.0 # blue
assert daily_response[1]["aggregated_value"] == 2.0 # red
assert daily_response[0]["aggregated_value"] == 2.0 # red
assert daily_response[1]["aggregated_value"] == 1.0 # blue
assert daily_response[2]["aggregated_value"] == 1.0 # $$_posthog_breakdown_null_$$

# TODO: Add support for avg_count by group indexes (see this Slack thread for more context: https://posthog.slack.com/archives/C0368RPHLQH/p1700484174374229)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1491,6 +1491,22 @@ def test_breakdown_values_limit(self):
)
self.assertEqual(len(response.results), 11)

# Now hide other aggregation
response = self._run_trends_query(
"2020-01-09",
"2020-01-20",
IntervalType.DAY,
[EventsNode(event="$pageview")],
TrendsFilter(display=ChartDisplayType.ACTIONS_LINE_GRAPH),
BreakdownFilter(
breakdown="breakdown_value",
breakdown_type=BreakdownType.EVENT,
breakdown_limit=10,
breakdown_hide_other_aggregation=True,
),
)
self.assertEqual(len(response.results), 10)

response = self._run_trends_query(
"2020-01-09",
"2020-01-20",
Expand All @@ -1502,6 +1518,34 @@ def test_breakdown_values_limit(self):
)
self.assertEqual(len(response.results), 30)

# Test actions table - it shows total values

response = self._run_trends_query(
"2020-01-09",
"2020-01-20",
IntervalType.DAY,
[EventsNode(event="$pageview")],
TrendsFilter(display=ChartDisplayType.ACTIONS_TABLE),
BreakdownFilter(breakdown="breakdown_value", breakdown_type=BreakdownType.EVENT, breakdown_limit=10),
)
self.assertEqual(len(response.results), 11)

# Now hide other aggregation
response = self._run_trends_query(
"2020-01-09",
"2020-01-20",
IntervalType.DAY,
[EventsNode(event="$pageview")],
TrendsFilter(display=ChartDisplayType.ACTIONS_TABLE),
BreakdownFilter(
breakdown="breakdown_value",
breakdown_type=BreakdownType.EVENT,
breakdown_limit=10,
breakdown_hide_other_aggregation=True,
),
)
self.assertEqual(len(response.results), 10)

def test_breakdown_values_unknown_property(self):
# same as above test, just without creating the property definition
for value in list(range(30)):
Expand Down Expand Up @@ -2128,8 +2172,8 @@ def test_to_actors_query_options_bar_value(self):
assert response.breakdown == [
BreakdownItem(label="Chrome", value="Chrome"),
BreakdownItem(label="Firefox", value="Firefox"),
BreakdownItem(label="Safari", value="Safari"),
BreakdownItem(label="Edge", value="Edge"),
BreakdownItem(label="Safari", value="Safari"),
]

@patch("posthog.hogql.query.sync_execute", wraps=sync_execute)
Expand Down
58 changes: 54 additions & 4 deletions posthog/hogql_queries/insights/trends/trends_query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ def build_query(self) -> ast.SelectQuery | ast.SelectUnionQuery:

if self._trends_display.is_total_value():
events_query = self._get_events_subquery(False, is_actors_query=False, breakdown=breakdown)
return events_query
wrapper_query = self._get_wrapper_query(events_query, breakdown=breakdown)
return wrapper_query
else:
event_query = self._get_events_subquery(False, is_actors_query=False, breakdown=breakdown)

Expand All @@ -70,6 +71,49 @@ def build_query(self) -> ast.SelectQuery | ast.SelectUnionQuery:

return full_query

def _get_breakdown_hide_others(self) -> bool:
return (
self.query.breakdownFilter.breakdown_hide_other_aggregation or False
if self.query.breakdownFilter
else False
)

def _get_wrapper_query(
self, events_query: ast.SelectQuery, breakdown: Breakdown
) -> ast.SelectQuery | ast.SelectUnionQuery:
if not breakdown.enabled:
return events_query

return parse_select(
"""
SELECT
SUM(total) AS total,
if(ifNull(greaterOrEquals(row_number, {breakdown_limit}), 0), {other_label}, toString(breakdown_value)) AS breakdown_value
FROM
(
SELECT
total,
breakdown_value,
row_number() OVER (ORDER BY total DESC) as row_number
FROM {events_query}
)
WHERE breakdown_value IS NOT NULL
GROUP BY breakdown_value
ORDER BY
breakdown_value = {other_label} ? 2 : breakdown_value = {nil} ? 1 : 0,
total DESC,
breakdown_value ASC
""",
placeholders={
"events_query": events_query,
"other_label": ast.Constant(
value=None if self._get_breakdown_hide_others() else BREAKDOWN_OTHER_STRING_LABEL
),
"nil": ast.Constant(value=BREAKDOWN_NULL_STRING_LABEL),
"breakdown_limit": ast.Constant(value=self._get_breakdown_limit() + 1),
},
)

def _get_date_subqueries(self) -> ast.Expr:
return parse_expr(
"""
Expand Down Expand Up @@ -358,24 +402,30 @@ def _outer_select_query(
groupArray(total),
arrayWithConstant(length(date), reinterpretAsFloat64(0))
) as total,
if(row_number >= {breakdown_limit}, {other}, breakdown_value) as breakdown_value
if(row_number >= {breakdown_limit}, {other_label}, toString(breakdown_value)) as breakdown_value
FROM {outer_query}
WHERE breakdown_value IS NOT NULL
GROUP BY breakdown_value
ORDER BY
breakdown_value = {other} ? 2 : breakdown_value = {nil} ? 1 : 0,
breakdown_value = {other_label} ? 2 : breakdown_value = {nil} ? 1 : 0,
arraySum(total) DESC,
breakdown_value ASC
""",
{
"outer_query": query,
"breakdown_limit": ast.Constant(value=self._get_breakdown_limit()),
"other": ast.Constant(value=BREAKDOWN_OTHER_STRING_LABEL),
"other_label": ast.Constant(
value=None if self._get_breakdown_hide_others() else BREAKDOWN_OTHER_STRING_LABEL
),
"nil": ast.Constant(value=BREAKDOWN_NULL_STRING_LABEL),
},
)
return query

def _get_breakdown_limit(self) -> int:
if self._trends_display.display_type == ChartDisplayType.WORLD_MAP:
return 250

return (
self.query.breakdownFilter and self.query.breakdownFilter.breakdown_limit
) or get_breakdown_limit_for_context(self.limit_context)
Expand Down
Loading

0 comments on commit 3f90216

Please sign in to comment.