From c4776085314c2c3cd0dfa564a4d74be74586f027 Mon Sep 17 00:00:00 2001 From: timgl Date: Mon, 17 Jun 2024 13:47:24 +0100 Subject: [PATCH 1/3] perf: Fix breakdown query memory usage --- posthog/hogql/functions/mapping.py | 2 ++ .../insights/trends/trends_query_builder.py | 22 +++++++++++++------ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/posthog/hogql/functions/mapping.py b/posthog/hogql/functions/mapping.py index d079b7878534c..1f2001b57945b 100644 --- a/posthog/hogql/functions/mapping.py +++ b/posthog/hogql/functions/mapping.py @@ -335,6 +335,8 @@ def compare_types(arg_types: list[ConstantType], sig_arg_types: tuple[ConstantTy "arrayAUC": HogQLFunctionMeta("arrayAUC", 2, 2), "arrayMap": HogQLFunctionMeta("arrayMap", 2, None), "arrayFill": HogQLFunctionMeta("arrayFill", 2, None), + "arrayFold": HogQLFunctionMeta("arrayFold", 3, None), + "arrayWithConstant": HogQLFunctionMeta("arrayWithConstant", 2, 2), "arraySplit": HogQLFunctionMeta("arraySplit", 2, None), "arrayReverseFill": HogQLFunctionMeta("arrayReverseFill", 2, None), "arrayReverseSplit": HogQLFunctionMeta("arrayReverseSplit", 2, None), diff --git a/posthog/hogql_queries/insights/trends/trends_query_builder.py b/posthog/hogql_queries/insights/trends/trends_query_builder.py index 75418ffba2a3e..d63bf1b0eec1a 100644 --- a/posthog/hogql_queries/insights/trends/trends_query_builder.py +++ b/posthog/hogql_queries/insights/trends/trends_query_builder.py @@ -338,17 +338,25 @@ def _outer_select_query( # TODO: What happens with cohorts and this limit? if not breakdown.is_histogram_breakdown: + # arrayFold is basically arrayReduce (but you can pass your own lambda function) + # it takes result array from the outer query which looks like this (if they're grouped under "other" values): + # [ + # [0, 0, 1], + # [0, 1, 0] + # ] + # and turns it into + # [0, 1, 1] return parse_select( """ SELECT groupArray(1)(date)[1] as date, - arrayMap( - i -> - arraySum(arrayMap( - x -> arrayElement(x, i), - groupArray(total) - )), - arrayEnumerate(date) + arrayFold( + (acc, x) -> arrayMap( + i -> acc[i] + x[i], + range(1, length(date) + 1) + ), + groupArray(total), + arrayWithConstant(length(date), reinterpretAsInt64(0)) ) as total, if(row_number >= {breakdown_limit}, {other}, breakdown_value) as breakdown_value FROM {outer_query} From fbc9e1a7e61cd18213b376103a2b642dac74c852 Mon Sep 17 00:00:00 2001 From: timgl Date: Mon, 17 Jun 2024 14:02:20 +0100 Subject: [PATCH 2/3] fix type --- .../test/__snapshots__/test_trends.ambr | 36 +++++++++---------- .../test_trends_data_warehouse_query.ambr | 8 ++--- .../insights/trends/trends_query_builder.py | 2 +- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr index f7cca83fa28a4..637be70bc7827 100644 --- a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr +++ b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr @@ -185,7 +185,7 @@ # name: TestTrends.test_breakdown_by_group_props_person_on_events ''' SELECT groupArray(1)(date)[1] AS date, - arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))))), 1))) AS date, @@ -305,7 +305,7 @@ # name: TestTrends.test_breakdown_by_group_props_with_person_filter_person_on_events ''' SELECT groupArray(1)(date)[1] AS date, - arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))))), 1))) AS date, @@ -393,7 +393,7 @@ # name: TestTrends.test_breakdown_filtering_with_properties_in_new_format ''' SELECT groupArray(1)(date)[1] AS date, - arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))))), 1))) AS date, @@ -432,7 +432,7 @@ # name: TestTrends.test_breakdown_filtering_with_properties_in_new_format.1 ''' SELECT groupArray(1)(date)[1] AS date, - arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))))), 1))) AS date, @@ -709,7 +709,7 @@ # name: TestTrends.test_breakdown_weekly_active_users_daily_based_on_action.2 ''' SELECT groupArray(1)(date)[1] AS date, - arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))))), 1))) AS date, @@ -855,7 +855,7 @@ # name: TestTrends.test_breakdown_with_filter_groups_person_on_events ''' SELECT groupArray(1)(date)[1] AS date, - arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))))), 1))) AS date, @@ -952,7 +952,7 @@ # name: TestTrends.test_breakdown_with_filter_groups_person_on_events_v2.1 ''' SELECT groupArray(1)(date)[1] AS date, - arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))))), 1))) AS date, @@ -1054,7 +1054,7 @@ # name: TestTrends.test_dau_with_breakdown_filtering_with_sampling ''' SELECT groupArray(1)(date)[1] AS date, - arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, @@ -1100,7 +1100,7 @@ # name: TestTrends.test_dau_with_breakdown_filtering_with_sampling.1 ''' SELECT groupArray(1)(date)[1] AS date, - arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, @@ -1496,7 +1496,7 @@ # name: TestTrends.test_mau_with_breakdown_filtering_and_prop_filter ''' SELECT groupArray(1)(date)[1] AS date, - arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, @@ -1638,7 +1638,7 @@ # name: TestTrends.test_mau_with_breakdown_filtering_and_prop_filter_poe_v2 ''' SELECT groupArray(1)(date)[1] AS date, - arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, @@ -3412,7 +3412,7 @@ # name: TestTrends.test_trend_breakdown_user_props_with_filter_with_partial_property_pushdowns ''' SELECT groupArray(1)(date)[1] AS date, - arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))))), 1))) AS date, @@ -3471,7 +3471,7 @@ # name: TestTrends.test_trend_breakdown_user_props_with_filter_with_partial_property_pushdowns.1 ''' SELECT groupArray(1)(date)[1] AS date, - arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))))), 1))) AS date, @@ -3966,7 +3966,7 @@ # name: TestTrends.test_trends_breakdown_cumulative ''' SELECT groupArray(1)(date)[1] AS date, - arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, @@ -4064,7 +4064,7 @@ # name: TestTrends.test_trends_breakdown_cumulative_poe_v2 ''' SELECT groupArray(1)(date)[1] AS date, - arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, @@ -4162,7 +4162,7 @@ # name: TestTrends.test_trends_breakdown_normalize_url ''' SELECT groupArray(1)(date)[1] AS date, - arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, @@ -5021,7 +5021,7 @@ # name: TestTrends.test_trends_with_session_property_total_volume_math_with_breakdowns ''' SELECT groupArray(1)(date)[1] AS date, - arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0), toIntervalWeek(number)), range(0, plus(coalesce(dateDiff('week', toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')), 0))), 1))) AS date, @@ -5075,7 +5075,7 @@ # name: TestTrends.test_trends_with_session_property_total_volume_math_with_breakdowns.1 ''' SELECT groupArray(1)(date)[1] AS date, - arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, diff --git a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends_data_warehouse_query.ambr b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends_data_warehouse_query.ambr index 5b65891935f0c..057877739dda9 100644 --- a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends_data_warehouse_query.ambr +++ b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends_data_warehouse_query.ambr @@ -2,7 +2,7 @@ # name: TestTrendsDataWarehouseQuery.test_trends_breakdown ''' SELECT groupArray(1)(date)[1] AS date, - arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))))), 1))) AS date, @@ -18,7 +18,7 @@ (SELECT count(e.id) AS total, toStartOfDay(toTimeZone(e.created, 'UTC')) AS day_start, ifNull(nullIf(toString(e.prop_1), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value - FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', '`id` String, `prop_1` String, `prop_2` String, `created` DateTime64(3, \'UTC\')') AS e + FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery_gw0/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', '`id` String, `prop_1` String, `prop_2` String, `created` DateTime64(3, \'UTC\')') AS e WHERE and(ifNull(greaterOrEquals(toTimeZone(e.created, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toTimeZone(e.created, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), true) GROUP BY day_start, breakdown_value) @@ -74,7 +74,7 @@ # name: TestTrendsDataWarehouseQuery.test_trends_breakdown_with_property ''' SELECT groupArray(1)(date)[1] AS date, - arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))))), 1))) AS date, @@ -90,7 +90,7 @@ (SELECT count(e.id) AS total, toStartOfDay(toTimeZone(e.created, 'UTC')) AS day_start, ifNull(nullIf(toString(e.prop_1), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value - FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', '`id` String, `prop_1` String, `prop_2` String, `created` DateTime64(3, \'UTC\')') AS e + FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery_gw0/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', '`id` String, `prop_1` String, `prop_2` String, `created` DateTime64(3, \'UTC\')') AS e WHERE and(ifNull(greaterOrEquals(toTimeZone(e.created, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toTimeZone(e.created, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), equals(e.prop_1, 'a'), true) GROUP BY day_start, breakdown_value) diff --git a/posthog/hogql_queries/insights/trends/trends_query_builder.py b/posthog/hogql_queries/insights/trends/trends_query_builder.py index d63bf1b0eec1a..2bfc397da03c4 100644 --- a/posthog/hogql_queries/insights/trends/trends_query_builder.py +++ b/posthog/hogql_queries/insights/trends/trends_query_builder.py @@ -356,7 +356,7 @@ def _outer_select_query( range(1, length(date) + 1) ), groupArray(total), - arrayWithConstant(length(date), reinterpretAsInt64(0)) + arrayWithConstant(length(date), reinterpretAsFloat64(0)) ) as total, if(row_number >= {breakdown_limit}, {other}, breakdown_value) as breakdown_value FROM {outer_query} From bef6fe461914850e602fb31a4b8a8dcdffb6ca51 Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 17 Jun 2024 13:13:07 +0000 Subject: [PATCH 3/3] Update query snapshots --- .../trends/test/__snapshots__/test_trends.ambr | 14 +++++++------- .../test_trends_data_warehouse_query.ambr | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr index 637be70bc7827..22187de2c8b0d 100644 --- a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr +++ b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr @@ -1802,7 +1802,7 @@ # name: TestTrends.test_person_filtering_in_cohort_in_action.2 ''' SELECT groupArray(1)(date)[1] AS date, - arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, @@ -1914,7 +1914,7 @@ # name: TestTrends.test_person_filtering_in_cohort_in_action_poe_v2.2 ''' SELECT groupArray(1)(date)[1] AS date, - arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, @@ -2559,7 +2559,7 @@ # name: TestTrends.test_timezones_daily.4 ''' SELECT groupArray(1)(date)[1] AS date, - arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))))), 1))) AS date, @@ -2776,7 +2776,7 @@ # name: TestTrends.test_timezones_daily_minus_utc.4 ''' SELECT groupArray(1)(date)[1] AS date, - arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))))), 1))) AS date, @@ -2993,7 +2993,7 @@ # name: TestTrends.test_timezones_daily_plus_utc.4 ''' SELECT groupArray(1)(date)[1] AS date, - arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))))), 1))) AS date, @@ -3690,7 +3690,7 @@ # name: TestTrends.test_trends_aggregate_by_distinct_id.2 ''' SELECT groupArray(1)(date)[1] AS date, - arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))))), 1))) AS date, @@ -3825,7 +3825,7 @@ # name: TestTrends.test_trends_aggregate_by_distinct_id.5 ''' SELECT groupArray(1)(date)[1] AS date, - arrayMap(i -> arraySum(arrayMap(x -> arrayElement(x, i), groupArray(total))), arrayEnumerate(date)) AS total, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))))), 1))) AS date, diff --git a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends_data_warehouse_query.ambr b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends_data_warehouse_query.ambr index 057877739dda9..4141c0043dea9 100644 --- a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends_data_warehouse_query.ambr +++ b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends_data_warehouse_query.ambr @@ -18,7 +18,7 @@ (SELECT count(e.id) AS total, toStartOfDay(toTimeZone(e.created, 'UTC')) AS day_start, ifNull(nullIf(toString(e.prop_1), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value - FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery_gw0/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', '`id` String, `prop_1` String, `prop_2` String, `created` DateTime64(3, \'UTC\')') AS e + FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', '`id` String, `prop_1` String, `prop_2` String, `created` DateTime64(3, \'UTC\')') AS e WHERE and(ifNull(greaterOrEquals(toTimeZone(e.created, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toTimeZone(e.created, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), true) GROUP BY day_start, breakdown_value) @@ -90,7 +90,7 @@ (SELECT count(e.id) AS total, toStartOfDay(toTimeZone(e.created, 'UTC')) AS day_start, ifNull(nullIf(toString(e.prop_1), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value - FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery_gw0/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', '`id` String, `prop_1` String, `prop_2` String, `created` DateTime64(3, \'UTC\')') AS e + FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', '`id` String, `prop_1` String, `prop_2` String, `created` DateTime64(3, \'UTC\')') AS e WHERE and(ifNull(greaterOrEquals(toTimeZone(e.created, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toTimeZone(e.created, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), equals(e.prop_1, 'a'), true) GROUP BY day_start, breakdown_value)