diff --git a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr index 91d53547b1942..02c5905fc6118 100644 --- a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr +++ b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr @@ -188,7 +188,7 @@ ''' SELECT groupArray(1)(date)[1] AS date, arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, - if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', toString(breakdown_value)) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))))), 1))) AS date, arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) @@ -220,6 +220,7 @@ ORDER BY day_start ASC, breakdown_value ASC) GROUP BY breakdown_value ORDER BY arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) GROUP BY breakdown_value ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, @@ -293,25 +294,11 @@ max_bytes_before_external_group_by=0 ''' # --- -# name: TestTrends.test_breakdown_by_group_props_person_on_events.3 - ''' - SELECT DISTINCT session_replay_events.session_id AS session_id - FROM session_replay_events - WHERE and(equals(session_replay_events.team_id, 2), ifNull(greaterOrEquals(toTimeZone(session_replay_events.min_first_timestamp, 'UTC'), minus(toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC'), toIntervalDay(21))), 0), in(session_replay_events.session_id, [''])) - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 - ''' -# --- # name: TestTrends.test_breakdown_by_group_props_with_person_filter_person_on_events ''' SELECT groupArray(1)(date)[1] AS date, arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, - if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', toString(breakdown_value)) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))))), 1))) AS date, arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) @@ -343,6 +330,7 @@ ORDER BY day_start ASC, breakdown_value ASC) GROUP BY breakdown_value ORDER BY arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) GROUP BY breakdown_value ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, @@ -355,52 +343,11 @@ max_bytes_before_external_group_by=0 ''' # --- -# name: TestTrends.test_breakdown_by_group_props_with_person_filter_person_on_events.1 - ''' - SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, - ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value - FROM - (SELECT sum(total) AS count, - day_start AS day_start, - breakdown_value AS breakdown_value - FROM - (SELECT count(e.uuid) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - transform(ifNull(nullIf(toString(e__group_0.properties___industry), ''), '$$_posthog_breakdown_null_$$'), ['finance'], ['finance'], '$$_posthog_breakdown_other_$$') AS breakdown_value - FROM events AS e SAMPLE 1 - LEFT JOIN - (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, - groups.group_type_index AS index, - groups.group_key AS key - FROM groups - WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) - GROUP BY groups.group_type_index, - groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, 'key'), ''), 'null'), '^"|"$', ''), 'value'), 0), true) - GROUP BY day_start, - breakdown_value) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC) - GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)), - arraySum(total) DESC, breakdown_value ASC - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 - ''' -# --- # name: TestTrends.test_breakdown_filtering_with_properties_in_new_format ''' SELECT groupArray(1)(date)[1] AS date, arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, - if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', toString(breakdown_value)) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))))), 1))) AS date, arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) @@ -424,6 +371,7 @@ ORDER BY day_start ASC, breakdown_value ASC) GROUP BY breakdown_value ORDER BY arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) GROUP BY breakdown_value ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, @@ -440,7 +388,7 @@ ''' SELECT groupArray(1)(date)[1] AS date, arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, - if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', toString(breakdown_value)) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))))), 1))) AS date, arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) @@ -464,6 +412,7 @@ ORDER BY day_start ASC, breakdown_value ASC) GROUP BY breakdown_value ORDER BY arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) GROUP BY breakdown_value ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, @@ -476,89 +425,51 @@ max_bytes_before_external_group_by=0 ''' # --- -# name: TestTrends.test_breakdown_filtering_with_properties_in_new_format.2 - ''' - SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', '')) AS value, - count(e.uuid) AS count - FROM events AS e - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), 'Firefox'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Windows'), 0)), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 - ''' -# --- -# name: TestTrends.test_breakdown_filtering_with_properties_in_new_format.3 - ''' - SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, - ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value - FROM - (SELECT sum(total) AS count, - day_start AS day_start, - breakdown_value AS breakdown_value - FROM - (SELECT count(e.uuid) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - transform(ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_null_$$'], ['$$_posthog_breakdown_null_$$'], '$$_posthog_breakdown_other_$$') AS breakdown_value - FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), 'Firefox'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Windows'), 0)), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0), true) - GROUP BY day_start, - breakdown_value) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC) - GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)), - arraySum(total) DESC, breakdown_value ASC - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 - ''' -# --- # name: TestTrends.test_breakdown_weekly_active_users_aggregated ''' - SELECT count(DISTINCT actor_id) AS total, - breakdown_value AS breakdown_value + SELECT sum(total) AS total, + if(ifNull(ifNull(greaterOrEquals(row_number, 26), 0), 0), '$$_posthog_breakdown_other_$$', toString(breakdown_value)) AS breakdown_value FROM - (SELECT d.timestamp AS timestamp, - e.actor_id AS actor_id, - e.breakdown_value AS breakdown_value + (SELECT total AS total, + breakdown_value AS breakdown_value, + row_number() OVER ( + ORDER BY total DESC) AS row_number FROM - (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS timestamp - FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC')))) AS numbers) AS d - CROSS JOIN - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e__pdi.person_id AS actor_id, - ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value - FROM events AS e SAMPLE 1 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), true), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0)) - GROUP BY timestamp, actor_id, - breakdown_value) AS e - WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) - GROUP BY d.timestamp, - e.actor_id, - e.breakdown_value - ORDER BY d.timestamp ASC) - WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0)) + (SELECT count(DISTINCT actor_id) AS total, + breakdown_value AS breakdown_value + FROM + (SELECT d.timestamp AS timestamp, + e.actor_id AS actor_id, + e.breakdown_value AS breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS timestamp + FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC')))) AS numbers) AS d + CROSS JOIN + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS actor_id, + ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), true), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0)) + GROUP BY timestamp, actor_id, + breakdown_value) AS e + WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) + GROUP BY d.timestamp, + e.actor_id, + e.breakdown_value + ORDER BY d.timestamp ASC) + WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0)) + GROUP BY breakdown_value)) + WHERE isNotNull(breakdown_value) GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, total DESC, + breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1, @@ -569,81 +480,51 @@ max_bytes_before_external_group_by=0 ''' # --- -# name: TestTrends.test_breakdown_weekly_active_users_aggregated.1 - ''' - SELECT count(DISTINCT actor_id) AS total, - breakdown_value AS breakdown_value - FROM - (SELECT d.timestamp AS timestamp, - e.actor_id AS actor_id, - e.breakdown_value AS breakdown_value - FROM - (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS timestamp - FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC')))) AS numbers) AS d - CROSS JOIN - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e__pdi.person_id AS actor_id, - transform(ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$'), ['val', 'bor'], ['val', 'bor'], '$$_posthog_breakdown_other_$$') AS breakdown_value - FROM events AS e SAMPLE 1 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), true), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0)) - GROUP BY timestamp, actor_id, - breakdown_value) AS e - WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) - GROUP BY d.timestamp, - e.actor_id, - e.breakdown_value - ORDER BY d.timestamp ASC) - WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0)) - GROUP BY breakdown_value - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 - ''' -# --- # name: TestTrends.test_breakdown_weekly_active_users_aggregated_materialized ''' - SELECT count(DISTINCT actor_id) AS total, - breakdown_value AS breakdown_value + SELECT sum(total) AS total, + if(ifNull(ifNull(greaterOrEquals(row_number, 26), 0), 0), '$$_posthog_breakdown_other_$$', toString(breakdown_value)) AS breakdown_value FROM - (SELECT d.timestamp AS timestamp, - e.actor_id AS actor_id, - e.breakdown_value AS breakdown_value + (SELECT total AS total, + breakdown_value AS breakdown_value, + row_number() OVER ( + ORDER BY total DESC) AS row_number FROM - (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS timestamp - FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC')))) AS numbers) AS d - CROSS JOIN - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e__pdi.person_id AS actor_id, - ifNull(nullIf(toString(nullIf(nullIf(e.mat_key, ''), 'null')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value - FROM events AS e SAMPLE 1 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), true), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0)) - GROUP BY timestamp, actor_id, - breakdown_value) AS e - WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) - GROUP BY d.timestamp, - e.actor_id, - e.breakdown_value - ORDER BY d.timestamp ASC) - WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0)) + (SELECT count(DISTINCT actor_id) AS total, + breakdown_value AS breakdown_value + FROM + (SELECT d.timestamp AS timestamp, + e.actor_id AS actor_id, + e.breakdown_value AS breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS timestamp + FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC')))) AS numbers) AS d + CROSS JOIN + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS actor_id, + ifNull(nullIf(toString(nullIf(nullIf(e.mat_key, ''), 'null')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), true), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0)) + GROUP BY timestamp, actor_id, + breakdown_value) AS e + WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) + GROUP BY d.timestamp, + e.actor_id, + e.breakdown_value + ORDER BY d.timestamp ASC) + WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0)) + GROUP BY breakdown_value)) + WHERE isNotNull(breakdown_value) GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, total DESC, + breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1, @@ -654,48 +535,6 @@ max_bytes_before_external_group_by=0 ''' # --- -# name: TestTrends.test_breakdown_weekly_active_users_aggregated_materialized.1 - ''' - SELECT count(DISTINCT actor_id) AS total, - breakdown_value AS breakdown_value - FROM - (SELECT d.timestamp AS timestamp, - e.actor_id AS actor_id, - e.breakdown_value AS breakdown_value - FROM - (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS timestamp - FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC')))) AS numbers) AS d - CROSS JOIN - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e__pdi.person_id AS actor_id, - transform(ifNull(nullIf(toString(nullIf(nullIf(e.mat_key, ''), 'null')), ''), '$$_posthog_breakdown_null_$$'), ['val', 'bor'], ['val', 'bor'], '$$_posthog_breakdown_other_$$') AS breakdown_value - FROM events AS e SAMPLE 1 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), true), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0)) - GROUP BY timestamp, actor_id, - breakdown_value) AS e - WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) - GROUP BY d.timestamp, - e.actor_id, - e.breakdown_value - ORDER BY d.timestamp ASC) - WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0)) - GROUP BY breakdown_value - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 - ''' -# --- # name: TestTrends.test_breakdown_weekly_active_users_daily_based_on_action ''' @@ -720,7 +559,7 @@ ''' SELECT groupArray(1)(date)[1] AS date, arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, - if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', toString(breakdown_value)) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))))), 1))) AS date, arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) @@ -781,6 +620,7 @@ ORDER BY day_start ASC, breakdown_value ASC) GROUP BY breakdown_value ORDER BY arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) GROUP BY breakdown_value ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, @@ -793,81 +633,11 @@ max_bytes_before_external_group_by=0 ''' # --- -# name: TestTrends.test_breakdown_weekly_active_users_daily_based_on_action.3 - ''' - SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, - ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value - FROM - (SELECT sum(total) AS count, - day_start AS day_start, - breakdown_value AS breakdown_value - FROM - (SELECT counts AS total, - toStartOfDay(timestamp) AS day_start, - breakdown_value AS breakdown_value - FROM - (SELECT d.timestamp AS timestamp, - count(DISTINCT e.actor_id) AS counts, - e.breakdown_value AS breakdown_value - FROM - (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS timestamp - FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC')))) AS numbers) AS d - CROSS JOIN - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e__pdi.person_id AS actor_id, - transform(ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$'), ['val'], ['val'], '$$_posthog_breakdown_other_$$') AS breakdown_value - FROM events AS e SAMPLE 1 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - LEFT JOIN - (SELECT person.id AS id, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'name'), ''), 'null'), '^"|"$', '') AS properties___name - FROM person - WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), - (SELECT person.id AS id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(person.created_at, person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), and(and(equals(e.event, '$pageview'), and(or(ifNull(equals(e__pdi__person.properties___name, 'p1'), 0), ifNull(equals(e__pdi__person.properties___name, 'p2'), 0), ifNull(equals(e__pdi__person.properties___name, 'p3'), 0)), ifNull(in(e__pdi.person_id, - (SELECT cohortpeople.person_id AS person_id - FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2), equals(cohortpeople.version, 0)))), 0))), true), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0)) - GROUP BY timestamp, actor_id, - breakdown_value) AS e - WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) - GROUP BY d.timestamp, - e.breakdown_value - ORDER BY d.timestamp ASC) - WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0))) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC) - GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)), - arraySum(total) DESC, breakdown_value ASC - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 - ''' -# --- # name: TestTrends.test_breakdown_with_filter_groups_person_on_events ''' SELECT groupArray(1)(date)[1] AS date, arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, - if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', toString(breakdown_value)) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))))), 1))) AS date, arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) @@ -899,6 +669,7 @@ ORDER BY day_start ASC, breakdown_value ASC) GROUP BY breakdown_value ORDER BY arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) GROUP BY breakdown_value ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, @@ -911,47 +682,6 @@ max_bytes_before_external_group_by=0 ''' # --- -# name: TestTrends.test_breakdown_with_filter_groups_person_on_events.1 - ''' - SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, - ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value - FROM - (SELECT sum(total) AS count, - day_start AS day_start, - breakdown_value AS breakdown_value - FROM - (SELECT count(e.uuid) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - transform(ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$'), ['uh', 'oh'], ['uh', 'oh'], '$$_posthog_breakdown_other_$$') AS breakdown_value - FROM events AS e SAMPLE 1 - LEFT JOIN - (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, - groups.group_type_index AS index, - groups.group_key AS key - FROM groups - WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) - GROUP BY groups.group_type_index, - groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(e__group_0.properties___industry, 'finance'), 0), true) - GROUP BY day_start, - breakdown_value) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC) - GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)), - arraySum(total) DESC, breakdown_value ASC - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 - ''' -# --- # name: TestTrends.test_breakdown_with_filter_groups_person_on_events_v2 ''' @@ -965,7 +695,7 @@ ''' SELECT groupArray(1)(date)[1] AS date, arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, - if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', toString(breakdown_value)) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))))), 1))) AS date, arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) @@ -1004,6 +734,7 @@ ORDER BY day_start ASC, breakdown_value ASC) GROUP BY breakdown_value ORDER BY arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) GROUP BY breakdown_value ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, @@ -1016,59 +747,11 @@ max_bytes_before_external_group_by=0 ''' # --- -# name: TestTrends.test_breakdown_with_filter_groups_person_on_events_v2.2 - ''' - SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, - ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value - FROM - (SELECT sum(total) AS count, - day_start AS day_start, - breakdown_value AS breakdown_value - FROM - (SELECT count(DISTINCT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id)) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - transform(ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$'), ['uh', 'oh'], ['uh', 'oh'], '$$_posthog_breakdown_other_$$') AS breakdown_value - FROM events AS e SAMPLE 1 - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 2) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - LEFT JOIN - (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, - groups.group_type_index AS index, - groups.group_key AS key - FROM groups - WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) - GROUP BY groups.group_type_index, - groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(e__group_0.properties___industry, 'finance'), 0), true) - GROUP BY day_start, - breakdown_value) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC) - GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)), - arraySum(total) DESC, breakdown_value ASC - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 - ''' -# --- # name: TestTrends.test_dau_with_breakdown_filtering_with_sampling ''' SELECT groupArray(1)(date)[1] AS date, arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, - if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', toString(breakdown_value)) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) @@ -1099,6 +782,7 @@ ORDER BY day_start ASC, breakdown_value ASC) GROUP BY breakdown_value ORDER BY arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) GROUP BY breakdown_value ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, @@ -1115,7 +799,7 @@ ''' SELECT groupArray(1)(date)[1] AS date, arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, - if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', toString(breakdown_value)) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) @@ -1146,6 +830,7 @@ ORDER BY day_start ASC, breakdown_value ASC) GROUP BY breakdown_value ORDER BY arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) GROUP BY breakdown_value ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, @@ -1158,63 +843,6 @@ max_bytes_before_external_group_by=0 ''' # --- -# name: TestTrends.test_dau_with_breakdown_filtering_with_sampling.2 - ''' - SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')) AS value, - count(e.uuid) AS count - FROM events AS e - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 - ''' -# --- -# name: TestTrends.test_dau_with_breakdown_filtering_with_sampling.3 - ''' - SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, - ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value - FROM - (SELECT sum(total) AS count, - day_start AS day_start, - breakdown_value AS breakdown_value - FROM - (SELECT count(DISTINCT e__pdi.person_id) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - transform(ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$'), ['other_value', '$$_posthog_breakdown_null_$$', 'value'], ['other_value', '$$_posthog_breakdown_null_$$', 'value'], '$$_posthog_breakdown_other_$$') AS breakdown_value - FROM events AS e SAMPLE 1.0 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), true) - GROUP BY day_start, - breakdown_value) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC) - GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)), - arraySum(total) DESC, breakdown_value ASC - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 - ''' -# --- # name: TestTrends.test_filter_events_by_precalculated_cohort ''' @@ -1518,7 +1146,7 @@ ''' SELECT groupArray(1)(date)[1] AS date, arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, - if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', toString(breakdown_value)) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) @@ -1577,6 +1205,7 @@ ORDER BY day_start ASC, breakdown_value ASC) GROUP BY breakdown_value ORDER BY arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) GROUP BY breakdown_value ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, @@ -1589,79 +1218,11 @@ max_bytes_before_external_group_by=0 ''' # --- -# name: TestTrends.test_mau_with_breakdown_filtering_and_prop_filter.1 - ''' - SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, - ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value - FROM - (SELECT sum(total) AS count, - day_start AS day_start, - breakdown_value AS breakdown_value - FROM - (SELECT counts AS total, - toStartOfDay(timestamp) AS day_start, - breakdown_value AS breakdown_value - FROM - (SELECT d.timestamp AS timestamp, - count(DISTINCT e.actor_id) AS counts, - e.breakdown_value AS breakdown_value - FROM - (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS timestamp - FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(30)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))) AS numbers) AS d - CROSS JOIN - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e__pdi.person_id AS actor_id, - transform(ifNull(nullIf(toString(e__pdi__person.`properties___$some_prop`), ''), '$$_posthog_breakdown_null_$$'), ['some_val2', 'some_val'], ['some_val2', 'some_val'], '$$_posthog_breakdown_other_$$') AS breakdown_value - FROM events AS e SAMPLE 1 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - LEFT JOIN - (SELECT person.id AS id, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS `properties___$some_prop`, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'filter_prop'), ''), 'null'), '^"|"$', '') AS properties___filter_prop - FROM person - WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), - (SELECT person.id AS id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(person.created_at, person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), and(equals(e.event, 'sign up'), ifNull(equals(e__pdi__person.properties___filter_prop, 'filter_val'), 0), true), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), toIntervalDay(30))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) - GROUP BY timestamp, actor_id, - breakdown_value) AS e - WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(29))), 0)) - GROUP BY d.timestamp, - e.breakdown_value - ORDER BY d.timestamp ASC) - WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0))) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC) - GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)), - arraySum(total) DESC, breakdown_value ASC - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 - ''' -# --- # name: TestTrends.test_mau_with_breakdown_filtering_and_prop_filter_poe_v2 ''' SELECT groupArray(1)(date)[1] AS date, arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, - if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', toString(breakdown_value)) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) @@ -1708,6 +1269,7 @@ ORDER BY day_start ASC, breakdown_value ASC) GROUP BY breakdown_value ORDER BY arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) GROUP BY breakdown_value ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, @@ -1720,62 +1282,6 @@ max_bytes_before_external_group_by=0 ''' # --- -# name: TestTrends.test_mau_with_breakdown_filtering_and_prop_filter_poe_v2.1 - ''' - SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, - ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value - FROM - (SELECT sum(total) AS count, - day_start AS day_start, - breakdown_value AS breakdown_value - FROM - (SELECT counts AS total, - toStartOfDay(timestamp) AS day_start, - breakdown_value AS breakdown_value - FROM - (SELECT d.timestamp AS timestamp, - count(DISTINCT e.actor_id) AS counts, - e.breakdown_value AS breakdown_value - FROM - (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS timestamp - FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(30)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))) AS numbers) AS d - CROSS JOIN - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS actor_id, - transform(ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, '$some_prop'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$'), ['some_val2', 'some_val'], ['some_val2', 'some_val'], '$$_posthog_breakdown_other_$$') AS breakdown_value - FROM events AS e SAMPLE 1 - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 2) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - WHERE and(equals(e.team_id, 2), and(equals(e.event, 'sign up'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, 'filter_prop'), ''), 'null'), '^"|"$', ''), 'filter_val'), 0), true), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), toIntervalDay(30))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) - GROUP BY timestamp, actor_id, - breakdown_value) AS e - WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(29))), 0)) - GROUP BY d.timestamp, - e.breakdown_value - ORDER BY d.timestamp ASC) - WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0))) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC) - GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)), - arraySum(total) DESC, breakdown_value ASC - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 - ''' -# --- # name: TestTrends.test_non_deterministic_timezones ''' SELECT arrayMap(number -> plus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2022-10-31 00:00:00', 6, 'US/Pacific')), 0), toIntervalWeek(number)), range(0, plus(coalesce(dateDiff('week', toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2022-10-31 00:00:00', 6, 'US/Pacific')), 0), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2022-11-30 23:59:59', 6, 'US/Pacific')), 0))), 1))) AS date, @@ -1827,7 +1333,7 @@ ''' SELECT groupArray(1)(date)[1] AS date, arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, - if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', toString(breakdown_value)) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) @@ -1851,69 +1357,27 @@ GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(e__pdi.person_id, - (SELECT cohortpeople.person_id AS person_id - FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2), equals(cohortpeople.version, 0)))), 0)), true) - GROUP BY day_start, - breakdown_value) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC) - GROUP BY breakdown_value - ORDER BY arraySum(total) DESC, breakdown_value ASC) - GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288, - max_bytes_before_external_group_by=0 - ''' -# --- -# name: TestTrends.test_person_filtering_in_cohort_in_action.3 - ''' - SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, - ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value - FROM - (SELECT sum(total) AS count, - day_start AS day_start, - breakdown_value AS breakdown_value - FROM - (SELECT count(e.uuid) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - transform(ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_null_$$', 'value', 'other_value'], ['$$_posthog_breakdown_null_$$', 'value', 'other_value'], '$$_posthog_breakdown_other_$$') AS breakdown_value - FROM events AS e SAMPLE 1 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(e__pdi.person_id, - (SELECT cohortpeople.person_id AS person_id - FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2), equals(cohortpeople.version, 0)))), 0)), true) + (SELECT cohortpeople.person_id AS person_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2), equals(cohortpeople.version, 0)))), 0)), true) + GROUP BY day_start, + breakdown_value) GROUP BY day_start, - breakdown_value) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC) + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)), - arraySum(total) DESC, breakdown_value ASC + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1, format_csv_allow_double_quotes=0, max_ast_elements=1000000, max_expanded_ast_elements=1000000, - max_query_size=524288 + max_query_size=524288, + max_bytes_before_external_group_by=0 ''' # --- # name: TestTrends.test_person_filtering_in_cohort_in_action_poe_v2 @@ -1940,7 +1404,7 @@ ''' SELECT groupArray(1)(date)[1] AS date, arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, - if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', toString(breakdown_value)) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) @@ -1974,6 +1438,7 @@ ORDER BY day_start ASC, breakdown_value ASC) GROUP BY breakdown_value ORDER BY arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) GROUP BY breakdown_value ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, @@ -1986,49 +1451,6 @@ max_bytes_before_external_group_by=0 ''' # --- -# name: TestTrends.test_person_filtering_in_cohort_in_action_poe_v2.3 - ''' - SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, - ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value - FROM - (SELECT sum(total) AS count, - day_start AS day_start, - breakdown_value AS breakdown_value - FROM - (SELECT count(e.uuid) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - transform(ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_null_$$', 'value', 'other_value'], ['$$_posthog_breakdown_null_$$', 'value', 'other_value'], '$$_posthog_breakdown_other_$$') AS breakdown_value - FROM events AS e SAMPLE 1 - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 2) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id), - (SELECT cohortpeople.person_id AS person_id - FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2), equals(cohortpeople.version, 0)))), 0)), true) - GROUP BY day_start, - breakdown_value) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC) - GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)), - arraySum(total) DESC, breakdown_value ASC - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 - ''' -# --- # name: TestTrends.test_person_property_filtering ''' SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, @@ -2601,7 +2023,7 @@ ''' SELECT groupArray(1)(date)[1] AS date, arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, - if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', toString(breakdown_value)) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))))), 1))) AS date, arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) @@ -2632,6 +2054,7 @@ ORDER BY day_start ASC, breakdown_value ASC) GROUP BY breakdown_value ORDER BY arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) GROUP BY breakdown_value ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, @@ -2644,46 +2067,6 @@ max_bytes_before_external_group_by=0 ''' # --- -# name: TestTrends.test_timezones_daily.5 - ''' - SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, - ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value - FROM - (SELECT sum(total) AS count, - day_start AS day_start, - breakdown_value AS breakdown_value - FROM - (SELECT count(DISTINCT e__pdi.person_id) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - transform(ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$'), ['Mac'], ['Mac'], '$$_posthog_breakdown_other_$$') AS breakdown_value - FROM events AS e SAMPLE 1 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), true) - GROUP BY day_start, - breakdown_value) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC) - GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)), - arraySum(total) DESC, breakdown_value ASC - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 - ''' -# --- # name: TestTrends.test_timezones_daily_minus_utc ''' SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))))), 1))) AS date, @@ -2823,7 +2206,7 @@ ''' SELECT groupArray(1)(date)[1] AS date, arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, - if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', toString(breakdown_value)) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))))), 1))) AS date, arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) @@ -2854,6 +2237,7 @@ ORDER BY day_start ASC, breakdown_value ASC) GROUP BY breakdown_value ORDER BY arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) GROUP BY breakdown_value ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, @@ -2866,46 +2250,6 @@ max_bytes_before_external_group_by=0 ''' # --- -# name: TestTrends.test_timezones_daily_minus_utc.5 - ''' - SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, - ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value - FROM - (SELECT sum(total) AS count, - day_start AS day_start, - breakdown_value AS breakdown_value - FROM - (SELECT count(DISTINCT e__pdi.person_id) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'America/Phoenix')) AS day_start, - transform(ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$'), ['Mac'], ['Mac'], '$$_posthog_breakdown_other_$$') AS breakdown_value - FROM events AS e SAMPLE 1 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up'), true) - GROUP BY day_start, - breakdown_value) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC) - GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)), - arraySum(total) DESC, breakdown_value ASC - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 - ''' -# --- # name: TestTrends.test_timezones_daily_plus_utc ''' SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))))), 1))) AS date, @@ -3045,7 +2389,7 @@ ''' SELECT groupArray(1)(date)[1] AS date, arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, - if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', toString(breakdown_value)) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))))), 1))) AS date, arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) @@ -3076,6 +2420,7 @@ ORDER BY day_start ASC, breakdown_value ASC) GROUP BY breakdown_value ORDER BY arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) GROUP BY breakdown_value ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, @@ -3088,46 +2433,6 @@ max_bytes_before_external_group_by=0 ''' # --- -# name: TestTrends.test_timezones_daily_plus_utc.5 - ''' - SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, - ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value - FROM - (SELECT sum(total) AS count, - day_start AS day_start, - breakdown_value AS breakdown_value - FROM - (SELECT count(DISTINCT e__pdi.person_id) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'Asia/Tokyo')) AS day_start, - transform(ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$'), ['Mac'], ['Mac'], '$$_posthog_breakdown_other_$$') AS breakdown_value - FROM events AS e SAMPLE 1 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up'), true) - GROUP BY day_start, - breakdown_value) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC) - GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)), - arraySum(total) DESC, breakdown_value ASC - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 - ''' -# --- # name: TestTrends.test_timezones_hourly_relative_from ''' SELECT arrayMap(number -> plus(toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'UTC'))), toIntervalHour(number)), range(0, plus(coalesce(dateDiff('hour', toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 00:00:00', 6, 'UTC'))), toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 10:59:59', 6, 'UTC'))))), 1))) AS date, @@ -3477,7 +2782,7 @@ ''' SELECT groupArray(1)(date)[1] AS date, arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, - if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', toString(breakdown_value)) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))))), 1))) AS date, arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) @@ -3521,6 +2826,7 @@ ORDER BY day_start ASC, breakdown_value ASC) GROUP BY breakdown_value ORDER BY arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) GROUP BY breakdown_value ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, @@ -3537,7 +2843,7 @@ ''' SELECT groupArray(1)(date)[1] AS date, arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, - if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', toString(breakdown_value)) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))))), 1))) AS date, arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) @@ -3581,6 +2887,7 @@ ORDER BY day_start ASC, breakdown_value ASC) GROUP BY breakdown_value ORDER BY arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) GROUP BY breakdown_value ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, @@ -3593,96 +2900,6 @@ max_bytes_before_external_group_by=0 ''' # --- -# name: TestTrends.test_trend_breakdown_user_props_with_filter_with_partial_property_pushdowns.2 - ''' - SELECT toString(e__pdi__person.properties___email) AS value, - count(e.uuid) AS count - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, - argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - LEFT JOIN - (SELECT person.id AS id, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$os'), ''), 'null'), '^"|"$', '') AS `properties___$os`, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$browser'), ''), 'null'), '^"|"$', '') AS `properties___$browser` - FROM person - WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), - (SELECT person.id AS id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(person.created_at, person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'chrome'), 0)), and(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0), ifNull(ilike(e__pdi__person.properties___email, '%@posthog.com%'), 0)))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 - ''' -# --- -# name: TestTrends.test_trend_breakdown_user_props_with_filter_with_partial_property_pushdowns.3 - ''' - SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, - ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value - FROM - (SELECT sum(total) AS count, - day_start AS day_start, - breakdown_value AS breakdown_value - FROM - (SELECT count(e.uuid) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - transform(ifNull(nullIf(toString(e__pdi__person.properties___email), ''), '$$_posthog_breakdown_null_$$'), ['test2@posthog.com'], ['test2@posthog.com'], '$$_posthog_breakdown_other_$$') AS breakdown_value - FROM events AS e SAMPLE 1 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, - argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - LEFT JOIN - (SELECT person.id AS id, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$os'), ''), 'null'), '^"|"$', '') AS `properties___$os`, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$browser'), ''), 'null'), '^"|"$', '') AS `properties___$browser` - FROM person - WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), - (SELECT person.id AS id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(person.created_at, person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'chrome'), 0)), and(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0), ifNull(ilike(e__pdi__person.properties___email, '%@posthog.com%'), 0)), true) - GROUP BY day_start, - breakdown_value) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC) - GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)), - arraySum(total) DESC, breakdown_value ASC - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 - ''' -# --- # name: TestTrends.test_trends_aggregate_by_distinct_id ''' SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))))), 1))) AS date, @@ -3759,7 +2976,7 @@ ''' SELECT groupArray(1)(date)[1] AS date, arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, - if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', toString(breakdown_value)) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))))), 1))) AS date, arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) @@ -3801,6 +3018,7 @@ ORDER BY day_start ASC, breakdown_value ASC) GROUP BY breakdown_value ORDER BY arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) GROUP BY breakdown_value ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, @@ -3897,7 +3115,7 @@ ''' SELECT groupArray(1)(date)[1] AS date, arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, - if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', toString(breakdown_value)) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))))), 1))) AS date, arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) @@ -3921,6 +3139,7 @@ ORDER BY day_start ASC, breakdown_value ASC) GROUP BY breakdown_value ORDER BY arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) GROUP BY breakdown_value ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, @@ -3933,56 +3152,6 @@ max_bytes_before_external_group_by=0 ''' # --- -# name: TestTrends.test_trends_aggregate_by_distinct_id.6 - ''' - SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_prop'), ''), 'null'), '^"|"$', '')) AS value, - count(e.uuid) AS count - FROM events AS e - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 - ''' -# --- -# name: TestTrends.test_trends_aggregate_by_distinct_id.7 - ''' - SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, - ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value - FROM - (SELECT sum(total) AS count, - day_start AS day_start, - breakdown_value AS breakdown_value - FROM - (SELECT count(DISTINCT e.distinct_id) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - transform(ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_prop'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_null_$$'], ['$$_posthog_breakdown_null_$$'], '$$_posthog_breakdown_other_$$') AS breakdown_value - FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), true) - GROUP BY day_start, - breakdown_value) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC) - GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)), - arraySum(total) DESC, breakdown_value ASC - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 - ''' -# --- # name: TestTrends.test_trends_any_event_total_count ''' SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, @@ -4041,7 +3210,7 @@ ''' SELECT groupArray(1)(date)[1] AS date, arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, - if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', toString(breakdown_value)) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, arrayFill(x -> ifNull(greater(x, 0), 0), arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) @@ -4078,6 +3247,7 @@ ORDER BY day_start ASC) GROUP BY breakdown_value ORDER BY arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) GROUP BY breakdown_value ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, @@ -4090,57 +3260,11 @@ max_bytes_before_external_group_by=0 ''' # --- -# name: TestTrends.test_trends_breakdown_cumulative.1 - ''' - SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayFill(x -> ifNull(greater(x, 0), 0), arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date)) AS total, - ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value - FROM - (SELECT day_start AS day_start, - sum(count) OVER (PARTITION BY breakdown_value - ORDER BY day_start ASC) AS count, - breakdown_value AS breakdown_value - FROM - (SELECT sum(total) AS count, - day_start AS day_start, - breakdown_value AS breakdown_value - FROM - (SELECT count(DISTINCT e__pdi.person_id) AS total, - min(toStartOfDay(toTimeZone(e.timestamp, 'UTC'))) AS day_start, - transform(ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_null_$$', 'value', 'other_value'], ['$$_posthog_breakdown_null_$$', 'value', 'other_value'], '$$_posthog_breakdown_other_$$') AS breakdown_value - FROM events AS e SAMPLE 1 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), true) - GROUP BY e__pdi.person_id, - breakdown_value) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC) - ORDER BY day_start ASC) - GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)), - arraySum(total) DESC, breakdown_value ASC - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 - ''' -# --- # name: TestTrends.test_trends_breakdown_cumulative_poe_v2 ''' SELECT groupArray(1)(date)[1] AS date, arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, - if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', toString(breakdown_value)) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, arrayFill(x -> ifNull(greater(x, 0), 0), arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) @@ -4174,72 +3298,27 @@ GROUP BY day_start, breakdown_value ORDER BY day_start ASC, breakdown_value ASC) - ORDER BY day_start ASC) - GROUP BY breakdown_value - ORDER BY arraySum(total) DESC, breakdown_value ASC) - GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288, - max_bytes_before_external_group_by=0 - ''' -# --- -# name: TestTrends.test_trends_breakdown_cumulative_poe_v2.1 - ''' - SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayFill(x -> ifNull(greater(x, 0), 0), arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date)) AS total, - ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value - FROM - (SELECT day_start AS day_start, - sum(count) OVER (PARTITION BY breakdown_value - ORDER BY day_start ASC) AS count, - breakdown_value AS breakdown_value - FROM - (SELECT sum(total) AS count, - day_start AS day_start, - breakdown_value AS breakdown_value - FROM - (SELECT count(DISTINCT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id)) AS total, - min(toStartOfDay(toTimeZone(e.timestamp, 'UTC'))) AS day_start, - transform(ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_null_$$', 'value', 'other_value'], ['$$_posthog_breakdown_null_$$', 'value', 'other_value'], '$$_posthog_breakdown_other_$$') AS breakdown_value - FROM events AS e SAMPLE 1 - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 2) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), true) - GROUP BY if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id), - breakdown_value) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC) - ORDER BY day_start ASC) + ORDER BY day_start ASC) + GROUP BY breakdown_value + ORDER BY arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)), - arraySum(total) DESC, breakdown_value ASC + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1, format_csv_allow_double_quotes=0, max_ast_elements=1000000, max_expanded_ast_elements=1000000, - max_query_size=524288 + max_query_size=524288, + max_bytes_before_external_group_by=0 ''' # --- # name: TestTrends.test_trends_breakdown_normalize_url ''' SELECT groupArray(1)(date)[1] AS date, arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, - if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', toString(breakdown_value)) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, arrayFill(x -> ifNull(greater(x, 0), 0), arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) @@ -4278,6 +3357,7 @@ ORDER BY day_start ASC) GROUP BY breakdown_value ORDER BY arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) GROUP BY breakdown_value ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, @@ -4290,74 +3370,38 @@ max_bytes_before_external_group_by=0 ''' # --- -# name: TestTrends.test_trends_breakdown_normalize_url.1 +# name: TestTrends.test_trends_breakdown_with_session_property_single_aggregate_math_and_breakdown ''' - SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayFill(x -> ifNull(greater(x, 0), 0), arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date)) AS total, - ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value + SELECT sum(total) AS total, + if(ifNull(ifNull(greaterOrEquals(row_number, 26), 0), 0), '$$_posthog_breakdown_other_$$', toString(breakdown_value)) AS breakdown_value FROM - (SELECT day_start AS day_start, - sum(count) OVER (PARTITION BY breakdown_value - ORDER BY day_start ASC) AS count, - breakdown_value AS breakdown_value + (SELECT total AS total, + breakdown_value AS breakdown_value, + row_number() OVER ( + ORDER BY total DESC) AS row_number FROM - (SELECT sum(total) AS count, - day_start AS day_start, + (SELECT quantile(0.5)(session_duration) AS total, breakdown_value AS breakdown_value FROM - (SELECT count(DISTINCT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id)) AS total, - min(toStartOfDay(toTimeZone(e.timestamp, 'UTC'))) AS day_start, - transform(ifNull(nullIf(toString(if(empty(trim(TRAILING '/?#' - FROM replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', ''))), '/', trim(TRAILING '/?#' - FROM replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', '')))), ''), '$$_posthog_breakdown_null_$$'), ['http://hogflix/first', 'http://hogflix/second'], ['http://hogflix/first', 'http://hogflix/second'], '$$_posthog_breakdown_other_$$') AS breakdown_value + (SELECT any(e__session.`$session_duration`) AS session_duration, + ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value FROM events AS e SAMPLE 1 - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 2) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), true) - GROUP BY if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id), - breakdown_value) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC) - ORDER BY day_start ASC) - GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)), - arraySum(total) DESC, breakdown_value ASC - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 - ''' -# --- -# name: TestTrends.test_trends_breakdown_with_session_property_single_aggregate_math_and_breakdown - ''' - SELECT quantile(0.5)(session_duration) AS total, - breakdown_value AS breakdown_value - FROM - (SELECT any(e__session.`$session_duration`) AS session_duration, - ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value - FROM events AS e SAMPLE 1 - LEFT JOIN - (SELECT dateDiff('second', min(sessions.min_timestamp), max(sessions.max_timestamp)) AS `$session_duration`, - sessions.session_id AS session_id - FROM sessions - WHERE and(equals(sessions.team_id, 2), ifNull(greaterOrEquals(plus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), 0), ifNull(lessOrEquals(minus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) - GROUP BY sessions.session_id, - sessions.session_id) AS e__session ON equals(e.`$session_id`, e__session.session_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), true) - GROUP BY e.`$session_id`, - breakdown_value - ORDER BY 1 DESC, breakdown_value DESC) + LEFT JOIN + (SELECT dateDiff('second', min(sessions.min_timestamp), max(sessions.max_timestamp)) AS `$session_duration`, + sessions.session_id AS session_id + FROM sessions + WHERE and(equals(sessions.team_id, 2), ifNull(greaterOrEquals(plus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), 0), ifNull(lessOrEquals(minus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) + GROUP BY sessions.session_id, + sessions.session_id) AS e__session ON equals(e.`$session_id`, e__session.session_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), true) + GROUP BY e.`$session_id`, + breakdown_value + ORDER BY 1 DESC, breakdown_value DESC) + GROUP BY breakdown_value)) + WHERE isNotNull(breakdown_value) GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, total DESC, + breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1, @@ -4370,24 +3414,36 @@ # --- # name: TestTrends.test_trends_breakdown_with_session_property_single_aggregate_math_and_breakdown.1 ''' - SELECT quantile(0.5)(session_duration) AS total, - breakdown_value AS breakdown_value + SELECT sum(total) AS total, + if(ifNull(ifNull(greaterOrEquals(row_number, 26), 0), 0), '$$_posthog_breakdown_other_$$', toString(breakdown_value)) AS breakdown_value FROM - (SELECT any(e__session.`$session_duration`) AS session_duration, - ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value - FROM events AS e SAMPLE 1 - LEFT JOIN - (SELECT dateDiff('second', min(sessions.min_timestamp), max(sessions.max_timestamp)) AS `$session_duration`, - sessions.session_id AS session_id - FROM sessions - WHERE and(equals(sessions.team_id, 2), ifNull(greaterOrEquals(plus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(minus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) - GROUP BY sessions.session_id, - sessions.session_id) AS e__session ON equals(e.`$session_id`, e__session.session_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), true) - GROUP BY e.`$session_id`, - breakdown_value - ORDER BY 1 DESC, breakdown_value DESC) + (SELECT total AS total, + breakdown_value AS breakdown_value, + row_number() OVER ( + ORDER BY total DESC) AS row_number + FROM + (SELECT quantile(0.5)(session_duration) AS total, + breakdown_value AS breakdown_value + FROM + (SELECT any(e__session.`$session_duration`) AS session_duration, + ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + LEFT JOIN + (SELECT dateDiff('second', min(sessions.min_timestamp), max(sessions.max_timestamp)) AS `$session_duration`, + sessions.session_id AS session_id + FROM sessions + WHERE and(equals(sessions.team_id, 2), ifNull(greaterOrEquals(plus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(minus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) + GROUP BY sessions.session_id, + sessions.session_id) AS e__session ON equals(e.`$session_id`, e__session.session_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), true) + GROUP BY e.`$session_id`, + breakdown_value + ORDER BY 1 DESC, breakdown_value DESC) + GROUP BY breakdown_value)) + WHERE isNotNull(breakdown_value) GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, total DESC, + breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1, @@ -4398,58 +3454,6 @@ max_bytes_before_external_group_by=0 ''' # --- -# name: TestTrends.test_trends_breakdown_with_session_property_single_aggregate_math_and_breakdown.2 - ''' - SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')) AS value, - max(e__session.`$session_duration`) AS count - FROM events AS e - LEFT JOIN - (SELECT dateDiff('second', min(sessions.min_timestamp), max(sessions.max_timestamp)) AS `$session_duration`, - sessions.session_id AS session_id - FROM sessions - WHERE and(equals(sessions.team_id, 2), ifNull(greaterOrEquals(plus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(minus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0), ifNull(greaterOrEquals(plus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(minus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) - GROUP BY sessions.session_id, - sessions.session_id) AS e__session ON equals(e.`$session_id`, e__session.session_id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 - ''' -# --- -# name: TestTrends.test_trends_breakdown_with_session_property_single_aggregate_math_and_breakdown.3 - ''' - SELECT quantile(0.5)(session_duration) AS total, - breakdown_value AS breakdown_value - FROM - (SELECT any(e__session.`$session_duration`) AS session_duration, - transform(ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$'), ['value2', 'value1', '$$_posthog_breakdown_null_$$'], ['value2', 'value1', '$$_posthog_breakdown_null_$$'], '$$_posthog_breakdown_other_$$') AS breakdown_value - FROM events AS e SAMPLE 1 - LEFT JOIN - (SELECT dateDiff('second', min(sessions.min_timestamp), max(sessions.max_timestamp)) AS `$session_duration`, - sessions.session_id AS session_id - FROM sessions - WHERE and(equals(sessions.team_id, 2), ifNull(greaterOrEquals(plus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(minus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) - GROUP BY sessions.session_id, - sessions.session_id) AS e__session ON equals(e.`$session_id`, e__session.session_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), true) - GROUP BY e.`$session_id`, - breakdown_value) - GROUP BY breakdown_value - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 - ''' -# --- # name: TestTrends.test_trends_compare_day_interval_relative_range ''' SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, @@ -4587,26 +3591,38 @@ # --- # name: TestTrends.test_trends_count_per_user_average_aggregated_with_event_property_breakdown_with_sampling ''' - SELECT total AS total, - breakdown_value AS breakdown_value + SELECT sum(total) AS total, + if(ifNull(ifNull(greaterOrEquals(row_number, 26), 0), 0), '$$_posthog_breakdown_other_$$', toString(breakdown_value)) AS breakdown_value FROM - (SELECT avg(total) AS total, - breakdown_value AS breakdown_value + (SELECT total AS total, + breakdown_value AS breakdown_value, + row_number() OVER ( + ORDER BY total DESC) AS row_number FROM - (SELECT count(e.uuid) AS total, - ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'color'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value - FROM events AS e SAMPLE 1.0 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(equals(e.event, 'viewed video'), true), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(0))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC')))) - GROUP BY e__pdi.person_id, - breakdown_value) - GROUP BY breakdown_value) + (SELECT total AS total, + breakdown_value AS breakdown_value + FROM + (SELECT avg(total) AS total, + breakdown_value AS breakdown_value + FROM + (SELECT count(e.uuid) AS total, + ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'color'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1.0 + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(equals(e.event, 'viewed video'), true), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(0))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC')))) + GROUP BY e__pdi.person_id, + breakdown_value) + GROUP BY breakdown_value))) + WHERE isNotNull(breakdown_value) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, total DESC, + breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1, @@ -4617,37 +3633,6 @@ max_bytes_before_external_group_by=0 ''' # --- -# name: TestTrends.test_trends_count_per_user_average_aggregated_with_event_property_breakdown_with_sampling.1 - ''' - SELECT total AS total, - breakdown_value AS breakdown_value - FROM - (SELECT avg(total) AS total, - breakdown_value AS breakdown_value - FROM - (SELECT count(e.uuid) AS total, - transform(ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'color'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$'), ['red', 'blue', '$$_posthog_breakdown_null_$$'], ['red', 'blue', '$$_posthog_breakdown_null_$$'], '$$_posthog_breakdown_other_$$') AS breakdown_value - FROM events AS e SAMPLE 1.0 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(equals(e.event, 'viewed video'), true), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(0))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC')))) - GROUP BY e__pdi.person_id, - breakdown_value) - GROUP BY breakdown_value) - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 - ''' -# --- # name: TestTrends.test_trends_count_per_user_average_daily ''' SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))))), 1))) AS date, @@ -4864,42 +3849,54 @@ # --- # name: TestTrends.test_trends_person_breakdown_with_session_property_single_aggregate_math_and_breakdown ''' - SELECT quantile(0.5)(session_duration) AS total, - breakdown_value AS breakdown_value + SELECT sum(total) AS total, + if(ifNull(ifNull(greaterOrEquals(row_number, 26), 0), 0), '$$_posthog_breakdown_other_$$', toString(breakdown_value)) AS breakdown_value FROM - (SELECT any(e__session.`$session_duration`) AS session_duration, - ifNull(nullIf(toString(e__pdi__person.`properties___$some_prop`), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value - FROM events AS e SAMPLE 1 - LEFT JOIN - (SELECT dateDiff('second', min(sessions.min_timestamp), max(sessions.max_timestamp)) AS `$session_duration`, - sessions.session_id AS session_id - FROM sessions - WHERE and(equals(sessions.team_id, 2), ifNull(greaterOrEquals(plus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), 0), ifNull(lessOrEquals(minus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) - GROUP BY sessions.session_id, - sessions.session_id) AS e__session ON equals(e.`$session_id`, e__session.session_id) - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, - argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - LEFT JOIN - (SELECT person.id AS id, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS `properties___$some_prop` - FROM person - WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), - (SELECT person.id AS id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(person.created_at, person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), true) - GROUP BY e.`$session_id`, - breakdown_value - ORDER BY 1 DESC, breakdown_value DESC) + (SELECT total AS total, + breakdown_value AS breakdown_value, + row_number() OVER ( + ORDER BY total DESC) AS row_number + FROM + (SELECT quantile(0.5)(session_duration) AS total, + breakdown_value AS breakdown_value + FROM + (SELECT any(e__session.`$session_duration`) AS session_duration, + ifNull(nullIf(toString(e__pdi__person.`properties___$some_prop`), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + LEFT JOIN + (SELECT dateDiff('second', min(sessions.min_timestamp), max(sessions.max_timestamp)) AS `$session_duration`, + sessions.session_id AS session_id + FROM sessions + WHERE and(equals(sessions.team_id, 2), ifNull(greaterOrEquals(plus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), 0), ifNull(lessOrEquals(minus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) + GROUP BY sessions.session_id, + sessions.session_id) AS e__session ON equals(e.`$session_id`, e__session.session_id) + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + LEFT JOIN + (SELECT person.id AS id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS `properties___$some_prop` + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(person.created_at, person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), true) + GROUP BY e.`$session_id`, + breakdown_value + ORDER BY 1 DESC, breakdown_value DESC) + GROUP BY breakdown_value)) + WHERE isNotNull(breakdown_value) GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, total DESC, + breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1, @@ -4910,52 +3907,6 @@ max_bytes_before_external_group_by=0 ''' # --- -# name: TestTrends.test_trends_person_breakdown_with_session_property_single_aggregate_math_and_breakdown.1 - ''' - SELECT quantile(0.5)(session_duration) AS total, - breakdown_value AS breakdown_value - FROM - (SELECT any(e__session.`$session_duration`) AS session_duration, - transform(ifNull(nullIf(toString(e__pdi__person.`properties___$some_prop`), ''), '$$_posthog_breakdown_null_$$'), ['some_val', 'another_val'], ['some_val', 'another_val'], '$$_posthog_breakdown_other_$$') AS breakdown_value - FROM events AS e SAMPLE 1 - LEFT JOIN - (SELECT dateDiff('second', min(sessions.min_timestamp), max(sessions.max_timestamp)) AS `$session_duration`, - sessions.session_id AS session_id - FROM sessions - WHERE and(equals(sessions.team_id, 2), ifNull(greaterOrEquals(plus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), 0), ifNull(lessOrEquals(minus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) - GROUP BY sessions.session_id, - sessions.session_id) AS e__session ON equals(e.`$session_id`, e__session.session_id) - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, - argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - LEFT JOIN - (SELECT person.id AS id, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS `properties___$some_prop` - FROM person - WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), - (SELECT person.id AS id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(person.created_at, person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), true) - GROUP BY e.`$session_id`, - breakdown_value) - GROUP BY breakdown_value - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 - ''' -# --- # name: TestTrends.test_trends_with_hogql_math ''' SELECT arrayMap(number -> plus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0), toIntervalWeek(number)), range(0, plus(coalesce(dateDiff('week', toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')), 0))), 1))) AS date, @@ -5119,7 +4070,7 @@ ''' SELECT groupArray(1)(date)[1] AS date, arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, - if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', toString(breakdown_value)) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0), toIntervalWeek(number)), range(0, plus(coalesce(dateDiff('week', toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')), 0))), 1))) AS date, arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) @@ -5158,6 +4109,7 @@ ORDER BY day_start ASC, breakdown_value ASC) GROUP BY breakdown_value ORDER BY arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) GROUP BY breakdown_value ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, @@ -5174,7 +4126,7 @@ ''' SELECT groupArray(1)(date)[1] AS date, arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, - if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', toString(breakdown_value)) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) @@ -5213,6 +4165,7 @@ ORDER BY day_start ASC, breakdown_value ASC) GROUP BY breakdown_value ORDER BY arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) GROUP BY breakdown_value ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC LIMIT 50000 SETTINGS readonly=2, @@ -5225,78 +4178,6 @@ max_bytes_before_external_group_by=0 ''' # --- -# name: TestTrends.test_trends_with_session_property_total_volume_math_with_breakdowns.2 - ''' - SELECT toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')) AS value, - max(e__session.`$session_duration`) AS count - FROM events AS e - LEFT JOIN - (SELECT dateDiff('second', min(sessions.min_timestamp), max(sessions.max_timestamp)) AS `$session_duration`, - sessions.session_id AS session_id - FROM sessions - WHERE and(equals(sessions.team_id, 2), ifNull(greaterOrEquals(plus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(minus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0), ifNull(greaterOrEquals(plus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(minus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) - GROUP BY sessions.session_id, - sessions.session_id) AS e__session ON equals(e.`$session_id`, e__session.session_id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 - ''' -# --- -# name: TestTrends.test_trends_with_session_property_total_volume_math_with_breakdowns.3 - ''' - SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, - ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value - FROM - (SELECT sum(total) AS count, - day_start AS day_start, - breakdown_value AS breakdown_value - FROM - (SELECT quantile(0.5)(session_duration) AS total, - day_start AS day_start, - breakdown_value AS breakdown_value - FROM - (SELECT any(e__session.`$session_duration`) AS session_duration, - transform(ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$'), ['value2', 'value1'], ['value2', 'value1'], '$$_posthog_breakdown_other_$$') AS breakdown_value, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start - FROM events AS e SAMPLE 1 - LEFT JOIN - (SELECT dateDiff('second', min(sessions.min_timestamp), max(sessions.max_timestamp)) AS `$session_duration`, - sessions.session_id AS session_id - FROM sessions - WHERE and(equals(sessions.team_id, 2), ifNull(greaterOrEquals(plus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(minus(toTimeZone(sessions.min_timestamp, 'UTC'), toIntervalDay(3)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) - GROUP BY sessions.session_id, - sessions.session_id) AS e__session ON equals(e.`$session_id`, e__session.session_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), true) - GROUP BY day_start, - e.`$session_id`, - breakdown_value, - day_start) - GROUP BY day_start, - breakdown_value) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC) - GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)), - arraySum(total) DESC, breakdown_value ASC - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 - ''' -# --- # name: TestTrends.test_weekly_active_users_aggregated_range_narrower_than_week ''' SELECT count(DISTINCT actor_id) AS total diff --git a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends_data_warehouse_query.ambr b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends_data_warehouse_query.ambr index 09625ed9a847c..59d0c6f5c2781 100644 --- a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends_data_warehouse_query.ambr +++ b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends_data_warehouse_query.ambr @@ -3,7 +3,7 @@ ''' SELECT groupArray(1)(date)[1] AS date, arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, - if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', toString(breakdown_value)) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))))), 1))) AS date, arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) @@ -27,6 +27,7 @@ ORDER BY day_start ASC, breakdown_value ASC) GROUP BY breakdown_value ORDER BY arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) GROUP BY breakdown_value ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC LIMIT 100 SETTINGS readonly=2, @@ -39,44 +40,11 @@ max_bytes_before_external_group_by=0 ''' # --- -# name: TestTrendsDataWarehouseQuery.test_trends_breakdown.1 - ''' - SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, - ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value - FROM - (SELECT sum(total) AS count, - day_start AS day_start, - breakdown_value AS breakdown_value - FROM - (SELECT count(e.id) AS total, - toStartOfDay(toTimeZone(e.created, 'UTC')) AS day_start, - transform(ifNull(nullIf(toString(e.prop_1), ''), '$$_posthog_breakdown_null_$$'), ['d', 'c', 'b', 'a'], ['d', 'c', 'b', 'a'], '$$_posthog_breakdown_other_$$') AS breakdown_value - FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', '`id` String, `prop_1` String, `prop_2` String, `created` DateTime64(3, \'UTC\')') AS e - WHERE and(ifNull(greaterOrEquals(toTimeZone(e.created, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toTimeZone(e.created, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), true) - GROUP BY day_start, - breakdown_value) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC) - GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)), - arraySum(total) DESC, breakdown_value ASC - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 - ''' -# --- # name: TestTrendsDataWarehouseQuery.test_trends_breakdown_with_property ''' SELECT groupArray(1)(date)[1] AS date, arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, - if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + if(ifNull(greaterOrEquals(row_number, 25), 0), '$$_posthog_breakdown_other_$$', toString(breakdown_value)) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))))), 1))) AS date, arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) @@ -100,6 +68,7 @@ ORDER BY day_start ASC, breakdown_value ASC) GROUP BY breakdown_value ORDER BY arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) GROUP BY breakdown_value ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC LIMIT 100 SETTINGS readonly=2, @@ -112,39 +81,6 @@ max_bytes_before_external_group_by=0 ''' # --- -# name: TestTrendsDataWarehouseQuery.test_trends_breakdown_with_property.1 - ''' - SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, - ifNull(toString(breakdown_value), '$$_posthog_breakdown_null_$$') AS breakdown_value - FROM - (SELECT sum(total) AS count, - day_start AS day_start, - breakdown_value AS breakdown_value - FROM - (SELECT count(e.id) AS total, - toStartOfDay(toTimeZone(e.created, 'UTC')) AS day_start, - transform(ifNull(nullIf(toString(e.prop_1), ''), '$$_posthog_breakdown_null_$$'), ['a'], ['a'], '$$_posthog_breakdown_other_$$') AS breakdown_value - FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', '`id` String, `prop_1` String, `prop_2` String, `created` DateTime64(3, \'UTC\')') AS e - WHERE and(ifNull(greaterOrEquals(toTimeZone(e.created, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toTimeZone(e.created, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), equals(e.prop_1, 'a'), true) - GROUP BY day_start, - breakdown_value) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC) - GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)), - arraySum(total) DESC, breakdown_value ASC - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288 - ''' -# --- # name: TestTrendsDataWarehouseQuery.test_trends_data_warehouse ''' SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))))), 1))) AS date, diff --git a/posthog/hogql_queries/insights/trends/test/test_trends.py b/posthog/hogql_queries/insights/trends/test/test_trends.py index c3838fa8fd63c..fa288cea532c2 100644 --- a/posthog/hogql_queries/insights/trends/test/test_trends.py +++ b/posthog/hogql_queries/insights/trends/test/test_trends.py @@ -1604,7 +1604,7 @@ def test_trends_breakdown_with_session_property_single_aggregate_math_and_breakd # empty has: 1 seconds self.assertEqual( [resp["breakdown_value"] for resp in daily_response], - ["value1", "value2", "$$_posthog_breakdown_null_$$"], + ["value2", "value1", "$$_posthog_breakdown_null_$$"], ) self.assertEqual(sorted([resp["aggregated_value"] for resp in daily_response]), sorted([12.5, 10, 1])) @@ -6499,9 +6499,9 @@ def test_breakdown_filtering_bar_chart_by_value(self): self.team, ) - self.assertEqual(response[0]["aggregated_value"], 2) + self.assertEqual(response[0]["aggregated_value"], 1) self.assertEqual(response[1]["aggregated_value"], 1) - self.assertEqual(response[2]["aggregated_value"], 1) # the events without breakdown value + self.assertEqual(response[2]["aggregated_value"], 2) # the events without breakdown value self.assertEqual(response[0]["days"], []) @also_test_with_materialized_columns(person_properties=["key", "key_2"], verify_no_jsonextract=False) @@ -7662,11 +7662,11 @@ def test_trends_count_per_user_average_aggregated_with_event_property_breakdown( ) assert len(daily_response) == 3 - assert daily_response[0]["breakdown_value"] == "blue" - assert daily_response[1]["breakdown_value"] == "red" + assert daily_response[0]["breakdown_value"] == "red" + assert daily_response[1]["breakdown_value"] == "blue" assert daily_response[2]["breakdown_value"] == "$$_posthog_breakdown_null_$$" - assert daily_response[0]["aggregated_value"] == 1.0 # blue - assert daily_response[1]["aggregated_value"] == 2.0 # red + assert daily_response[0]["aggregated_value"] == 2.0 # red + assert daily_response[1]["aggregated_value"] == 1.0 # blue assert daily_response[2]["aggregated_value"] == 1.0 # $$_posthog_breakdown_null_$$ @snapshot_clickhouse_queries @@ -7689,11 +7689,11 @@ def test_trends_count_per_user_average_aggregated_with_event_property_breakdown_ ) assert len(daily_response) == 3 - assert daily_response[0]["breakdown_value"] == "blue" - assert daily_response[1]["breakdown_value"] == "red" + assert daily_response[0]["breakdown_value"] == "red" + assert daily_response[1]["breakdown_value"] == "blue" assert daily_response[2]["breakdown_value"] == "$$_posthog_breakdown_null_$$" - assert daily_response[0]["aggregated_value"] == 1.0 # blue - assert daily_response[1]["aggregated_value"] == 2.0 # red + assert daily_response[0]["aggregated_value"] == 2.0 # red + assert daily_response[1]["aggregated_value"] == 1.0 # blue assert daily_response[2]["aggregated_value"] == 1.0 # $$_posthog_breakdown_null_$$ # TODO: Add support for avg_count by group indexes (see this Slack thread for more context: https://posthog.slack.com/archives/C0368RPHLQH/p1700484174374229) diff --git a/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py b/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py index 5ac3c588fd970..627ee7f5a6ee9 100644 --- a/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py +++ b/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py @@ -1491,6 +1491,22 @@ def test_breakdown_values_limit(self): ) self.assertEqual(len(response.results), 11) + # Now hide other aggregation + response = self._run_trends_query( + "2020-01-09", + "2020-01-20", + IntervalType.DAY, + [EventsNode(event="$pageview")], + TrendsFilter(display=ChartDisplayType.ACTIONS_LINE_GRAPH), + BreakdownFilter( + breakdown="breakdown_value", + breakdown_type=BreakdownType.EVENT, + breakdown_limit=10, + breakdown_hide_other_aggregation=True, + ), + ) + self.assertEqual(len(response.results), 10) + response = self._run_trends_query( "2020-01-09", "2020-01-20", @@ -1502,6 +1518,34 @@ def test_breakdown_values_limit(self): ) self.assertEqual(len(response.results), 30) + # Test actions table - it shows total values + + response = self._run_trends_query( + "2020-01-09", + "2020-01-20", + IntervalType.DAY, + [EventsNode(event="$pageview")], + TrendsFilter(display=ChartDisplayType.ACTIONS_TABLE), + BreakdownFilter(breakdown="breakdown_value", breakdown_type=BreakdownType.EVENT, breakdown_limit=10), + ) + self.assertEqual(len(response.results), 11) + + # Now hide other aggregation + response = self._run_trends_query( + "2020-01-09", + "2020-01-20", + IntervalType.DAY, + [EventsNode(event="$pageview")], + TrendsFilter(display=ChartDisplayType.ACTIONS_TABLE), + BreakdownFilter( + breakdown="breakdown_value", + breakdown_type=BreakdownType.EVENT, + breakdown_limit=10, + breakdown_hide_other_aggregation=True, + ), + ) + self.assertEqual(len(response.results), 10) + def test_breakdown_values_unknown_property(self): # same as above test, just without creating the property definition for value in list(range(30)): @@ -2128,8 +2172,8 @@ def test_to_actors_query_options_bar_value(self): assert response.breakdown == [ BreakdownItem(label="Chrome", value="Chrome"), BreakdownItem(label="Firefox", value="Firefox"), - BreakdownItem(label="Safari", value="Safari"), BreakdownItem(label="Edge", value="Edge"), + BreakdownItem(label="Safari", value="Safari"), ] @patch("posthog.hogql.query.sync_execute", wraps=sync_execute) diff --git a/posthog/hogql_queries/insights/trends/trends_query_builder.py b/posthog/hogql_queries/insights/trends/trends_query_builder.py index 2bfc397da03c4..8ef3c97dd73e5 100644 --- a/posthog/hogql_queries/insights/trends/trends_query_builder.py +++ b/posthog/hogql_queries/insights/trends/trends_query_builder.py @@ -61,7 +61,8 @@ def build_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: if self._trends_display.is_total_value(): events_query = self._get_events_subquery(False, is_actors_query=False, breakdown=breakdown) - return events_query + wrapper_query = self._get_wrapper_query(events_query, breakdown=breakdown) + return wrapper_query else: event_query = self._get_events_subquery(False, is_actors_query=False, breakdown=breakdown) @@ -70,6 +71,49 @@ def build_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: return full_query + def _get_breakdown_hide_others(self) -> bool: + return ( + self.query.breakdownFilter.breakdown_hide_other_aggregation or False + if self.query.breakdownFilter + else False + ) + + def _get_wrapper_query( + self, events_query: ast.SelectQuery, breakdown: Breakdown + ) -> ast.SelectQuery | ast.SelectUnionQuery: + if not breakdown.enabled: + return events_query + + return parse_select( + """ + SELECT + SUM(total) AS total, + if(ifNull(greaterOrEquals(row_number, {breakdown_limit}), 0), {other_label}, toString(breakdown_value)) AS breakdown_value + FROM + ( + SELECT + total, + breakdown_value, + row_number() OVER (ORDER BY total DESC) as row_number + FROM {events_query} + ) + WHERE breakdown_value IS NOT NULL + GROUP BY breakdown_value + ORDER BY + breakdown_value = {other_label} ? 2 : breakdown_value = {nil} ? 1 : 0, + total DESC, + breakdown_value ASC + """, + placeholders={ + "events_query": events_query, + "other_label": ast.Constant( + value=None if self._get_breakdown_hide_others() else BREAKDOWN_OTHER_STRING_LABEL + ), + "nil": ast.Constant(value=BREAKDOWN_NULL_STRING_LABEL), + "breakdown_limit": ast.Constant(value=self._get_breakdown_limit() + 1), + }, + ) + def _get_date_subqueries(self) -> ast.Expr: return parse_expr( """ @@ -358,24 +402,30 @@ def _outer_select_query( groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0)) ) as total, - if(row_number >= {breakdown_limit}, {other}, breakdown_value) as breakdown_value + if(row_number >= {breakdown_limit}, {other_label}, toString(breakdown_value)) as breakdown_value FROM {outer_query} + WHERE breakdown_value IS NOT NULL GROUP BY breakdown_value ORDER BY - breakdown_value = {other} ? 2 : breakdown_value = {nil} ? 1 : 0, + breakdown_value = {other_label} ? 2 : breakdown_value = {nil} ? 1 : 0, arraySum(total) DESC, breakdown_value ASC """, { "outer_query": query, "breakdown_limit": ast.Constant(value=self._get_breakdown_limit()), - "other": ast.Constant(value=BREAKDOWN_OTHER_STRING_LABEL), + "other_label": ast.Constant( + value=None if self._get_breakdown_hide_others() else BREAKDOWN_OTHER_STRING_LABEL + ), "nil": ast.Constant(value=BREAKDOWN_NULL_STRING_LABEL), }, ) return query def _get_breakdown_limit(self) -> int: + if self._trends_display.display_type == ChartDisplayType.WORLD_MAP: + return 250 + return ( self.query.breakdownFilter and self.query.breakdownFilter.breakdown_limit ) or get_breakdown_limit_for_context(self.limit_context) diff --git a/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr b/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr index d5aceabdff1e3..435f4a02949d1 100644 --- a/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr +++ b/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr @@ -3371,7 +3371,8 @@ format_csv_allow_double_quotes=0, max_ast_elements=1000000, max_expanded_ast_elements=1000000, - max_query_size=524288 + max_query_size=524288, + max_bytes_before_external_group_by=0 ''' # --- # name: TestSessionRecordingsListFromFilters.test_operand_or_event_filters @@ -3408,7 +3409,8 @@ format_csv_allow_double_quotes=0, max_ast_elements=1000000, max_expanded_ast_elements=1000000, - max_query_size=524288 + max_query_size=524288, + max_bytes_before_external_group_by=0 ''' # --- # name: TestSessionRecordingsListFromFilters.test_operand_or_event_filters.1 @@ -3445,7 +3447,8 @@ format_csv_allow_double_quotes=0, max_ast_elements=1000000, max_expanded_ast_elements=1000000, - max_query_size=524288 + max_query_size=524288, + max_bytes_before_external_group_by=0 ''' # --- # name: TestSessionRecordingsListFromFilters.test_operand_or_filters @@ -3483,7 +3486,8 @@ format_csv_allow_double_quotes=0, max_ast_elements=1000000, max_expanded_ast_elements=1000000, - max_query_size=524288 + max_query_size=524288, + max_bytes_before_external_group_by=0 ''' # --- # name: TestSessionRecordingsListFromFilters.test_operand_or_filters.1 @@ -3521,7 +3525,8 @@ format_csv_allow_double_quotes=0, max_ast_elements=1000000, max_expanded_ast_elements=1000000, - max_query_size=524288 + max_query_size=524288, + max_bytes_before_external_group_by=0 ''' # --- # name: TestSessionRecordingsListFromFilters.test_operand_or_mandatory_filters @@ -3566,7 +3571,8 @@ format_csv_allow_double_quotes=0, max_ast_elements=1000000, max_expanded_ast_elements=1000000, - max_query_size=524288 + max_query_size=524288, + max_bytes_before_external_group_by=0 ''' # --- # name: TestSessionRecordingsListFromFilters.test_operand_or_mandatory_filters.1 @@ -3611,7 +3617,8 @@ format_csv_allow_double_quotes=0, max_ast_elements=1000000, max_expanded_ast_elements=1000000, - max_query_size=524288 + max_query_size=524288, + max_bytes_before_external_group_by=0 ''' # --- # name: TestSessionRecordingsListFromFilters.test_operand_or_mandatory_filters.2 @@ -3648,7 +3655,8 @@ format_csv_allow_double_quotes=0, max_ast_elements=1000000, max_expanded_ast_elements=1000000, - max_query_size=524288 + max_query_size=524288, + max_bytes_before_external_group_by=0 ''' # --- # name: TestSessionRecordingsListFromFilters.test_operand_or_mandatory_filters.3 @@ -3685,7 +3693,8 @@ format_csv_allow_double_quotes=0, max_ast_elements=1000000, max_expanded_ast_elements=1000000, - max_query_size=524288 + max_query_size=524288, + max_bytes_before_external_group_by=0 ''' # --- # name: TestSessionRecordingsListFromFilters.test_operand_or_person_filters @@ -3734,7 +3743,8 @@ format_csv_allow_double_quotes=0, max_ast_elements=1000000, max_expanded_ast_elements=1000000, - max_query_size=524288 + max_query_size=524288, + max_bytes_before_external_group_by=0 ''' # --- # name: TestSessionRecordingsListFromFilters.test_operand_or_person_filters.1