diff --git a/posthog/hogql_queries/insights/trends/aggregation_operations.py b/posthog/hogql_queries/insights/trends/aggregation_operations.py index efcf86db14bef..39e5b8c2b2494 100644 --- a/posthog/hogql_queries/insights/trends/aggregation_operations.py +++ b/posthog/hogql_queries/insights/trends/aggregation_operations.py @@ -138,7 +138,18 @@ def _math_func(self, method: str, override_chain: Optional[list[str | int]]) -> else: chain = ["properties", self.series.math_property] - return ast.Call(name=method, args=[ast.Field(chain=chain)]) + return ast.Call( + # Two caveats here: + # 1. We always parse/convert the value to a Float64, to make sure it's a number. This truncates precision + # of very large integers, but it's a tradeoff preventing queries failing with "Illegal type String" + # 2. We fall back to 0 when there's no data, which is not quite kosher for math functions other than sum + # (null would actually be more meaningful for e.g. min or max), but formulas aren't equipped to handle nulls + name="ifNull", + args=[ + ast.Call(name=method, args=[ast.Call(name="toFloat", args=[ast.Field(chain=chain)])]), + ast.Constant(value=0), + ], + ) def _math_quantile(self, percentile: float, override_chain: Optional[list[str | int]]) -> ast.Call: if self.series.math_property == "$session_duration": diff --git a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_formula.ambr b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_formula.ambr new file mode 100644 index 0000000000000..db90d36978361 --- /dev/null +++ b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_formula.ambr @@ -0,0 +1,926 @@ +# serializer version: 1 +# name: TestFormula.test_aggregated_one_without_events + ''' + SELECT ifNull(sum(accurateCastOrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'xyz'), ''), 'null'), '^"|"$', ''), 'Float64')), 0) AS total + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'session start')) + ORDER BY 1 DESC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestFormula.test_aggregated_one_without_events.1 + ''' + SELECT ifNull(sum(accurateCastOrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'session not here'), ''), 'null'), '^"|"$', ''), 'Float64')), 0) AS total + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'session error')) + ORDER BY 1 DESC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestFormula.test_breakdown + ''' + SELECT groupArray(1)(date)[1] AS date, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, + if(ifNull(ifNull(greaterOrEquals(row_number, 25), 0), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + FROM + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + breakdown_value AS breakdown_value, + rowNumberInAllBlocks() AS row_number + FROM + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT ifNull(sum(accurateCastOrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'xyz'), ''), 'null'), '^"|"$', ''), 'Float64')), 0) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'location'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'session start')) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestFormula.test_breakdown.1 + ''' + SELECT groupArray(1)(date)[1] AS date, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, + if(ifNull(ifNull(greaterOrEquals(row_number, 25), 0), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + FROM + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + breakdown_value AS breakdown_value, + rowNumberInAllBlocks() AS row_number + FROM + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT ifNull(avg(accurateCastOrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'xyz'), ''), 'null'), '^"|"$', ''), 'Float64')), 0) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'location'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'session start')) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestFormula.test_breakdown_aggregated + ''' + SELECT sum(total) AS total, + if(ifNull(ifNull(greaterOrEquals(row_number, 26), 0), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + FROM + (SELECT count AS total, + breakdown_value AS breakdown_value, + row_number() OVER ( + ORDER BY total DESC) AS row_number + FROM + (SELECT sum(total) AS count, + breakdown_value AS breakdown_value + FROM + (SELECT ifNull(sum(accurateCastOrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'xyz'), ''), 'null'), '^"|"$', ''), 'Float64')), 0) AS total, + ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'location'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'session start')) + GROUP BY breakdown_value) + GROUP BY breakdown_value + ORDER BY breakdown_value ASC) + ORDER BY total DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, total DESC, + breakdown_value ASC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestFormula.test_breakdown_aggregated.1 + ''' + SELECT sum(total) AS total, + if(ifNull(ifNull(greaterOrEquals(row_number, 26), 0), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + FROM + (SELECT count AS total, + breakdown_value AS breakdown_value, + row_number() OVER ( + ORDER BY total DESC) AS row_number + FROM + (SELECT sum(total) AS count, + breakdown_value AS breakdown_value + FROM + (SELECT ifNull(avg(accurateCastOrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'xyz'), ''), 'null'), '^"|"$', ''), 'Float64')), 0) AS total, + ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'location'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'session start')) + GROUP BY breakdown_value) + GROUP BY breakdown_value + ORDER BY breakdown_value ASC) + ORDER BY total DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, total DESC, + breakdown_value ASC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestFormula.test_breakdown_cohort + ''' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version = NULL + ''' +# --- +# name: TestFormula.test_breakdown_cohort.1 + ''' + /* cohort_calculation: */ + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version = 0 + ''' +# --- +# name: TestFormula.test_breakdown_cohort.2 + ''' + SELECT groupArray(1)(date)[1] AS date, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, + if(ifNull(ifNull(greaterOrEquals(row_number, 25), 0), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + FROM + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + breakdown_value AS breakdown_value, + rowNumberInAllBlocks() AS row_number + FROM + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT ifNull(sum(accurateCastOrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'xyz'), ''), 'null'), '^"|"$', ''), 'Float64')), 0) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + toString(0) AS breakdown_value + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'session start')) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestFormula.test_breakdown_cohort.3 + ''' + SELECT groupArray(1)(date)[1] AS date, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, + if(ifNull(ifNull(greaterOrEquals(row_number, 25), 0), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + FROM + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + breakdown_value AS breakdown_value, + rowNumberInAllBlocks() AS row_number + FROM + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT ifNull(avg(accurateCastOrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'xyz'), ''), 'null'), '^"|"$', ''), 'Float64')), 0) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + toString(0) AS breakdown_value + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'session start')) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestFormula.test_breakdown_cohort.4 + ''' + SELECT groupArray(1)(date)[1] AS date, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, + if(ifNull(ifNull(greaterOrEquals(row_number, 25), 0), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + FROM + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + breakdown_value AS breakdown_value, + rowNumberInAllBlocks() AS row_number + FROM + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT ifNull(sum(accurateCastOrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'xyz'), ''), 'null'), '^"|"$', ''), 'Float64')), 0) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + toString(999932324) AS breakdown_value + FROM events AS e SAMPLE 1 + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 2) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'session start'), ifNull(in(if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id), + (SELECT cohortpeople.person_id AS person_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2), equals(cohortpeople.version, 0)))), 0)) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestFormula.test_breakdown_cohort.5 + ''' + SELECT groupArray(1)(date)[1] AS date, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, + if(ifNull(ifNull(greaterOrEquals(row_number, 25), 0), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + FROM + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + breakdown_value AS breakdown_value, + rowNumberInAllBlocks() AS row_number + FROM + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT ifNull(avg(accurateCastOrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'xyz'), ''), 'null'), '^"|"$', ''), 'Float64')), 0) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + toString(999932324) AS breakdown_value + FROM events AS e SAMPLE 1 + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 2) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'session start'), ifNull(in(if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id), + (SELECT cohortpeople.person_id AS person_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2), equals(cohortpeople.version, 0)))), 0)) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestFormula.test_breakdown_hogql + ''' + SELECT groupArray(1)(date)[1] AS date, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, + if(ifNull(ifNull(greaterOrEquals(row_number, 25), 0), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + FROM + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + breakdown_value AS breakdown_value, + rowNumberInAllBlocks() AS row_number + FROM + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT ifNull(sum(accurateCastOrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'xyz'), ''), 'null'), '^"|"$', ''), 'Float64')), 0) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + ifNull(nullIf(toString(concat(ifNull(toString(e__person.`properties___$some_prop`), ''), ' : ', ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'location'), ''), 'null'), '^"|"$', '')), ''))), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 2) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) + LEFT JOIN + (SELECT person.id AS id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS `properties___$some_prop` + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__person ON equals(if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id), e__person.id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'session start')) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestFormula.test_breakdown_hogql.1 + ''' + SELECT groupArray(1)(date)[1] AS date, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, + if(ifNull(ifNull(greaterOrEquals(row_number, 25), 0), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + FROM + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + breakdown_value AS breakdown_value, + rowNumberInAllBlocks() AS row_number + FROM + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT ifNull(avg(accurateCastOrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'xyz'), ''), 'null'), '^"|"$', ''), 'Float64')), 0) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + ifNull(nullIf(toString(concat(ifNull(toString(e__person.`properties___$some_prop`), ''), ' : ', ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'location'), ''), 'null'), '^"|"$', '')), ''))), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 2) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) + LEFT JOIN + (SELECT person.id AS id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS `properties___$some_prop` + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__person ON equals(if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id), e__person.id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'session start')) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestFormula.test_breakdown_with_different_breakdown_values_per_series + ''' + SELECT groupArray(1)(date)[1] AS date, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, + if(ifNull(ifNull(greaterOrEquals(row_number, 25), 0), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + FROM + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + breakdown_value AS breakdown_value, + rowNumberInAllBlocks() AS row_number + FROM + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT ifNull(sum(accurateCastOrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'xyz'), ''), 'null'), '^"|"$', ''), 'Float64')), 0) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'location'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'session start')) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestFormula.test_breakdown_with_different_breakdown_values_per_series.1 + ''' + SELECT groupArray(1)(date)[1] AS date, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, + if(ifNull(ifNull(greaterOrEquals(row_number, 25), 0), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + FROM + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + breakdown_value AS breakdown_value, + rowNumberInAllBlocks() AS row_number + FROM + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT ifNull(sum(accurateCastOrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'xyz'), ''), 'null'), '^"|"$', ''), 'Float64')), 0) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'location'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'session end')) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestFormula.test_formula_with_unique_sessions + ''' + SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total + FROM + (SELECT sum(total) AS count, + day_start AS day_start + FROM + (SELECT count(DISTINCT e.`$session_id`) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'session start')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY arraySum(total) DESC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestFormula.test_formula_with_unique_sessions.1 + ''' + SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total + FROM + (SELECT sum(total) AS count, + day_start AS day_start + FROM + (SELECT count(DISTINCT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id)) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 2) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'session start')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY arraySum(total) DESC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestFormula.test_hour_interval_day_level_relative + ''' + SELECT arrayMap(number -> plus(toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-02 00:00:00', 6, 'UTC'))), toIntervalHour(number)), range(0, plus(coalesce(dateDiff('hour', toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-02 00:00:00', 6, 'UTC'))), toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-03 13:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total + FROM + (SELECT sum(total) AS count, + day_start AS day_start + FROM + (SELECT ifNull(sum(accurateCastOrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'xyz'), ''), 'null'), '^"|"$', ''), 'Float64')), 0) AS total, + toStartOfHour(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-02 00:00:00', 6, 'UTC'))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-03 13:59:59', 6, 'UTC'))), equals(e.event, 'session start')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY arraySum(total) DESC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestFormula.test_hour_interval_day_level_relative.1 + ''' + SELECT arrayMap(number -> plus(toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-02 00:00:00', 6, 'UTC'))), toIntervalHour(number)), range(0, plus(coalesce(dateDiff('hour', toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-02 00:00:00', 6, 'UTC'))), toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-03 13:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total + FROM + (SELECT sum(total) AS count, + day_start AS day_start + FROM + (SELECT ifNull(avg(accurateCastOrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'xyz'), ''), 'null'), '^"|"$', ''), 'Float64')), 0) AS total, + toStartOfHour(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-02 00:00:00', 6, 'UTC'))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-03 13:59:59', 6, 'UTC'))), equals(e.event, 'session start')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY arraySum(total) DESC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestFormula.test_hour_interval_hour_level_relative + ''' + SELECT arrayMap(number -> plus(toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-02 13:00:00', 6, 'UTC'))), toIntervalHour(number)), range(0, plus(coalesce(dateDiff('hour', toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-02 13:00:00', 6, 'UTC'))), toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-03 13:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total + FROM + (SELECT sum(total) AS count, + day_start AS day_start + FROM + (SELECT ifNull(sum(accurateCastOrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'xyz'), ''), 'null'), '^"|"$', ''), 'Float64')), 0) AS total, + toStartOfHour(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-02 13:00:00', 6, 'UTC'))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-03 13:59:59', 6, 'UTC'))), equals(e.event, 'session start')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY arraySum(total) DESC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestFormula.test_hour_interval_hour_level_relative.1 + ''' + SELECT arrayMap(number -> plus(toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-02 13:00:00', 6, 'UTC'))), toIntervalHour(number)), range(0, plus(coalesce(dateDiff('hour', toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-02 13:00:00', 6, 'UTC'))), toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-03 13:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total + FROM + (SELECT sum(total) AS count, + day_start AS day_start + FROM + (SELECT ifNull(avg(accurateCastOrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'xyz'), ''), 'null'), '^"|"$', ''), 'Float64')), 0) AS total, + toStartOfHour(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-02 13:00:00', 6, 'UTC'))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-03 13:59:59', 6, 'UTC'))), equals(e.event, 'session start')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY arraySum(total) DESC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestFormula.test_regression_formula_with_session_duration_aggregation + ''' + SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total + FROM + (SELECT sum(total) AS count, + day_start AS day_start + FROM + (SELECT ifNull(avg(accurateCastOrNull(session_duration, 'Float64')), 0) AS total, + day_start AS day_start + FROM + (SELECT any(e__session.`$session_duration`) AS session_duration, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + LEFT JOIN + (SELECT dateDiff('second', min(toTimeZone(raw_sessions.min_timestamp, 'UTC')), max(toTimeZone(raw_sessions.max_timestamp, 'UTC'))) AS `$session_duration`, + raw_sessions.session_id_v7 AS session_id_v7 + FROM raw_sessions + WHERE and(equals(raw_sessions.team_id, 2), ifNull(greaterOrEquals(plus(fromUnixTimestamp(intDiv(toUInt64(bitShiftRight(raw_sessions.session_id_v7, 80)), 1000)), toIntervalDay(3)), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(minus(fromUnixTimestamp(intDiv(toUInt64(bitShiftRight(raw_sessions.session_id_v7, 80)), 1000)), toIntervalDay(3)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) + GROUP BY raw_sessions.session_id_v7, + raw_sessions.session_id_v7) AS e__session ON equals(toUInt128(accurateCastOrNull(e.`$session_id`, 'UUID')), e__session.session_id_v7) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'session start')) + GROUP BY day_start, + e.`$session_id`, + day_start) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY arraySum(total) DESC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestFormula.test_regression_formula_with_session_duration_aggregation.1 + ''' + SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total + FROM + (SELECT sum(total) AS count, + day_start AS day_start + FROM + (SELECT count() AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'session end')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY arraySum(total) DESC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestFormula.test_regression_formula_with_unique_sessions_2x_and_duration_filter + ''' + SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total + FROM + (SELECT sum(total) AS count, + day_start AS day_start + FROM + (SELECT count(DISTINCT e.`$session_id`) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + LEFT JOIN + (SELECT dateDiff('second', min(toTimeZone(raw_sessions.min_timestamp, 'UTC')), max(toTimeZone(raw_sessions.max_timestamp, 'UTC'))) AS `$session_duration`, + raw_sessions.session_id_v7 AS session_id_v7 + FROM raw_sessions + WHERE and(equals(raw_sessions.team_id, 2), ifNull(greaterOrEquals(plus(fromUnixTimestamp(intDiv(toUInt64(bitShiftRight(raw_sessions.session_id_v7, 80)), 1000)), toIntervalDay(3)), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(minus(fromUnixTimestamp(intDiv(toUInt64(bitShiftRight(raw_sessions.session_id_v7, 80)), 1000)), toIntervalDay(3)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) + GROUP BY raw_sessions.session_id_v7, + raw_sessions.session_id_v7) AS e__session ON equals(toUInt128(accurateCastOrNull(e.`$session_id`, 'UUID')), e__session.session_id_v7) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'session start'), ifNull(greater(e__session.`$session_duration`, 12.0), 0)) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY arraySum(total) DESC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestFormula.test_regression_formula_with_unique_sessions_2x_and_duration_filter.1 + ''' + SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total + FROM + (SELECT sum(total) AS count, + day_start AS day_start + FROM + (SELECT count(DISTINCT e.`$session_id`) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'session start')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY arraySum(total) DESC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestFormula.test_regression_formula_with_unique_sessions_2x_and_duration_filter_2x + ''' + SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total + FROM + (SELECT sum(total) AS count, + day_start AS day_start + FROM + (SELECT count(DISTINCT e.`$session_id`) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + LEFT JOIN + (SELECT dateDiff('second', min(toTimeZone(raw_sessions.min_timestamp, 'UTC')), max(toTimeZone(raw_sessions.max_timestamp, 'UTC'))) AS `$session_duration`, + raw_sessions.session_id_v7 AS session_id_v7 + FROM raw_sessions + WHERE and(equals(raw_sessions.team_id, 2), ifNull(greaterOrEquals(plus(fromUnixTimestamp(intDiv(toUInt64(bitShiftRight(raw_sessions.session_id_v7, 80)), 1000)), toIntervalDay(3)), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(minus(fromUnixTimestamp(intDiv(toUInt64(bitShiftRight(raw_sessions.session_id_v7, 80)), 1000)), toIntervalDay(3)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) + GROUP BY raw_sessions.session_id_v7, + raw_sessions.session_id_v7) AS e__session ON equals(toUInt128(accurateCastOrNull(e.`$session_id`, 'UUID')), e__session.session_id_v7) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, '$autocapture'), ifNull(less(e__session.`$session_duration`, 30.0), 0)) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY arraySum(total) DESC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestFormula.test_regression_formula_with_unique_sessions_2x_and_duration_filter_2x.1 + ''' + SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total + FROM + (SELECT sum(total) AS count, + day_start AS day_start + FROM + (SELECT count(DISTINCT e.`$session_id`) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + LEFT JOIN + (SELECT dateDiff('second', min(toTimeZone(raw_sessions.min_timestamp, 'UTC')), max(toTimeZone(raw_sessions.max_timestamp, 'UTC'))) AS `$session_duration`, + raw_sessions.session_id_v7 AS session_id_v7 + FROM raw_sessions + WHERE and(equals(raw_sessions.team_id, 2), ifNull(greaterOrEquals(plus(fromUnixTimestamp(intDiv(toUInt64(bitShiftRight(raw_sessions.session_id_v7, 80)), 1000)), toIntervalDay(3)), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(minus(fromUnixTimestamp(intDiv(toUInt64(bitShiftRight(raw_sessions.session_id_v7, 80)), 1000)), toIntervalDay(3)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) + GROUP BY raw_sessions.session_id_v7, + raw_sessions.session_id_v7) AS e__session ON equals(toUInt128(accurateCastOrNull(e.`$session_id`, 'UUID')), e__session.session_id_v7) + WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'session start'), ifNull(greater(e__session.`$session_duration`, 500.0), 0)) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY arraySum(total) DESC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- diff --git a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr index 7ab2824a23f68..6027f7ca7bb42 100644 --- a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr +++ b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr @@ -851,49 +851,14 @@ # --- # name: TestTrends.test_dau_with_breakdown_filtering_with_sampling.1 ''' - SELECT groupArray(1)(date)[1] AS date, - arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, - if(ifNull(ifNull(greaterOrEquals(row_number, 25), 0), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value - FROM - (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, - breakdown_value AS breakdown_value, - rowNumberInAllBlocks() AS row_number - FROM - (SELECT sum(total) AS count, - day_start AS day_start, - breakdown_value AS breakdown_value - FROM - (SELECT count(DISTINCT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id)) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value - FROM events AS e SAMPLE 1.0 - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 2) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) - GROUP BY day_start, - breakdown_value) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC) - GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC) - WHERE isNotNull(breakdown_value) - GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0 + /* celery:posthog.tasks.tasks.sync_insight_caching_state */ + SELECT team_id, + date_diff('second', max(timestamp), now()) AS age + FROM events + WHERE timestamp > date_sub(DAY, 3, now()) + AND timestamp < now() + GROUP BY team_id + ORDER BY age; ''' # --- # name: TestTrends.test_dau_with_breakdown_filtering_with_sampling.10 @@ -923,12 +888,12 @@ # name: TestTrends.test_dau_with_breakdown_filtering_with_sampling.12 ''' SELECT groupArray(1)(date)[1] AS date, - arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, if(ifNull(ifNull(greaterOrEquals(row_number, 25), 0), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, breakdown_value AS breakdown_value, rowNumberInAllBlocks() AS row_number FROM @@ -970,12 +935,12 @@ # name: TestTrends.test_dau_with_breakdown_filtering_with_sampling.13 ''' SELECT groupArray(1)(date)[1] AS date, - arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, if(ifNull(ifNull(greaterOrEquals(row_number, 25), 0), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, breakdown_value AS breakdown_value, rowNumberInAllBlocks() AS row_number FROM @@ -1017,12 +982,12 @@ # name: TestTrends.test_dau_with_breakdown_filtering_with_sampling.14 ''' SELECT groupArray(1)(date)[1] AS date, - arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, arrayMap(i -> if(ifNull(ifNull(greaterOrEquals(row_number, 25), 0), 0), '$$_posthog_breakdown_other_$$', i), breakdown_value) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, breakdown_value AS breakdown_value, rowNumberInAllBlocks() AS row_number FROM @@ -1064,12 +1029,12 @@ # name: TestTrends.test_dau_with_breakdown_filtering_with_sampling.15 ''' SELECT groupArray(1)(date)[1] AS date, - arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(total), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, arrayMap(i -> if(ifNull(ifNull(greaterOrEquals(row_number, 25), 0), 0), '$$_posthog_breakdown_other_$$', i), breakdown_value) AS breakdown_value FROM (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(count), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, breakdown_value AS breakdown_value, rowNumberInAllBlocks() AS row_number FROM @@ -1110,143 +1075,38 @@ # --- # name: TestTrends.test_dau_with_breakdown_filtering_with_sampling.2 ''' - SELECT groupArray(1)(date)[1] AS date, - arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, - if(ifNull(ifNull(greaterOrEquals(row_number, 25), 0), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value - FROM - (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, - breakdown_value AS breakdown_value, - rowNumberInAllBlocks() AS row_number - FROM - (SELECT sum(total) AS count, - day_start AS day_start, - breakdown_value AS breakdown_value - FROM - (SELECT count(DISTINCT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id)) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value - FROM events AS e SAMPLE 1.0 - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 2) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) - GROUP BY day_start, - breakdown_value) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC) - GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC) - WHERE isNotNull(breakdown_value) - GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0 + /* celery:posthog.tasks.tasks.sync_insight_caching_state */ + SELECT team_id, + date_diff('second', max(timestamp), now()) AS age + FROM events + WHERE timestamp > date_sub(DAY, 3, now()) + AND timestamp < now() + GROUP BY team_id + ORDER BY age; ''' # --- # name: TestTrends.test_dau_with_breakdown_filtering_with_sampling.3 ''' - SELECT groupArray(1)(date)[1] AS date, - arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, - arrayMap(i -> if(ifNull(ifNull(greaterOrEquals(row_number, 25), 0), 0), '$$_posthog_breakdown_other_$$', i), breakdown_value) AS breakdown_value - FROM - (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, - breakdown_value AS breakdown_value, - rowNumberInAllBlocks() AS row_number - FROM - (SELECT sum(total) AS count, - day_start AS day_start, - [ifNull(toString(breakdown_value_1), '$$_posthog_breakdown_null_$$')] AS breakdown_value - FROM - (SELECT count(DISTINCT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id)) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value_1 - FROM events AS e SAMPLE 1.0 - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 2) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) - GROUP BY day_start, - breakdown_value_1) - GROUP BY day_start, - breakdown_value_1 - ORDER BY day_start ASC, breakdown_value ASC) - GROUP BY breakdown_value - ORDER BY if(has(breakdown_value, '$$_posthog_breakdown_other_$$'), 2, if(has(breakdown_value, '$$_posthog_breakdown_null_$$'), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC) - WHERE arrayExists(x -> isNotNull(x), breakdown_value) - GROUP BY breakdown_value - ORDER BY if(has(breakdown_value, '$$_posthog_breakdown_other_$$'), 2, if(has(breakdown_value, '$$_posthog_breakdown_null_$$'), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0 + /* celery:posthog.tasks.tasks.sync_insight_caching_state */ + SELECT team_id, + date_diff('second', max(timestamp), now()) AS age + FROM events + WHERE timestamp > date_sub(DAY, 3, now()) + AND timestamp < now() + GROUP BY team_id + ORDER BY age; ''' # --- # name: TestTrends.test_dau_with_breakdown_filtering_with_sampling.4 ''' - SELECT groupArray(1)(date)[1] AS date, - arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, - arrayMap(i -> if(ifNull(ifNull(greaterOrEquals(row_number, 25), 0), 0), '$$_posthog_breakdown_other_$$', i), breakdown_value) AS breakdown_value - FROM - (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total, - breakdown_value AS breakdown_value, - rowNumberInAllBlocks() AS row_number - FROM - (SELECT sum(total) AS count, - day_start AS day_start, - [ifNull(toString(breakdown_value_1), '$$_posthog_breakdown_null_$$')] AS breakdown_value - FROM - (SELECT count(DISTINCT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id)) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value_1 - FROM events AS e SAMPLE 1.0 - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 2) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')) - GROUP BY day_start, - breakdown_value_1) - GROUP BY day_start, - breakdown_value_1 - ORDER BY day_start ASC, breakdown_value ASC) - GROUP BY breakdown_value - ORDER BY if(has(breakdown_value, '$$_posthog_breakdown_other_$$'), 2, if(has(breakdown_value, '$$_posthog_breakdown_null_$$'), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC) - WHERE arrayExists(x -> isNotNull(x), breakdown_value) - GROUP BY breakdown_value - ORDER BY if(has(breakdown_value, '$$_posthog_breakdown_other_$$'), 2, if(has(breakdown_value, '$$_posthog_breakdown_null_$$'), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0 + /* celery:posthog.tasks.tasks.sync_insight_caching_state */ + SELECT team_id, + date_diff('second', max(timestamp), now()) AS age + FROM events + WHERE timestamp > date_sub(DAY, 3, now()) + AND timestamp < now() + GROUP BY team_id + ORDER BY age; ''' # --- # name: TestTrends.test_dau_with_breakdown_filtering_with_sampling.5 diff --git a/posthog/queries/trends/test/test_formula.py b/posthog/hogql_queries/insights/trends/test/test_formula.py similarity index 55% rename from posthog/queries/trends/test/test_formula.py rename to posthog/hogql_queries/insights/trends/test/test_formula.py index d711bbff6f827..a1579a30582a5 100644 --- a/posthog/queries/trends/test/test_formula.py +++ b/posthog/hogql_queries/insights/trends/test/test_formula.py @@ -1,27 +1,39 @@ from typing import Optional +from unittest import mock -from freezegun.api import freeze_time +from django.test import override_settings from posthog.constants import TRENDS_CUMULATIVE, TRENDS_PIE, TRENDS_BOLD_NUMBER -from posthog.models import Cohort, Person -from posthog.models.filters.filter import Filter +from posthog.models import Cohort from posthog.models.group.util import create_group -from posthog.queries.trends.trends import Trends +from posthog.models.utils import uuid7 from posthog.test.base import ( APIBaseTest, ClickhouseTestMixin, _create_event, + _create_person, + flush_persons_and_events, snapshot_clickhouse_queries, ) +from freezegun import freeze_time +from posthog.hogql_queries.insights.trends.trends_query_runner import TrendsQueryRunner +from posthog.schema import ( + TrendsFilter, + TrendsQuery, +) + + +@override_settings(IN_UNIT_TESTING=True) class TestFormula(ClickhouseTestMixin, APIBaseTest): CLASS_DATA_LEVEL_SETUP = False + maxDiff = None def setUp(self): super().setUp() - Person.objects.create( + _create_person( team_id=self.team.pk, distinct_ids=["blabla", "anonymous_id"], properties={"$some_prop": "some_val"}, @@ -34,16 +46,17 @@ def setUp(self): properties={"industry": "finance"}, ) + s1 = str(uuid7("2020-01-02T13:01:01Z", 1)) with freeze_time("2020-01-02T13:01:01Z"): _create_event( team=self.team, event="session start", distinct_id="blabla", properties={ - "session duration": 200, + "xyz": 200, "location": "Paris", "$current_url": "http://example.org", - "$session_id": "1", + "$session_id": s1, "$group_0": "org:5", }, ) @@ -52,9 +65,9 @@ def setUp(self): event="session start", distinct_id="blabla", properties={ - "session duration": 300, + "xyz": 300, "location": "Paris", - "$session_id": "1", + "$session_id": s1, "$group_0": "org:5", }, ) @@ -63,9 +76,9 @@ def setUp(self): event="session start", distinct_id="blabla", properties={ - "session duration": 400, + "xyz": 400, "location": "London", - "$session_id": "1", + "$session_id": s1, "$group_0": "org:5", }, ) @@ -75,9 +88,9 @@ def setUp(self): event="session start", distinct_id="blabla", properties={ - "session duration": 400, + "xyz": 400, "location": "London", - "$session_id": "1", + "$session_id": s1, "$group_0": "org:5", }, ) @@ -87,9 +100,9 @@ def setUp(self): event="session start", distinct_id="blabla", properties={ - "session duration": 500, + "xyz": 500, "location": "London", - "$session_id": "1", + "$session_id": s1, "$group_0": "org:5", }, ) @@ -98,9 +111,9 @@ def setUp(self): event="session end", distinct_id="blabla", properties={ - "session duration": 500, + "xyz": 500, "location": "London", - "$session_id": "1", + "$session_id": s1, "$group_0": "org:5", }, ) @@ -110,9 +123,9 @@ def setUp(self): event="session end", distinct_id="blabla", properties={ - "session duration": 500, + "xyz": 500, "location": "Belo Horizonte", - "$session_id": "1", + "$session_id": s1, "$group_0": "org:5", }, ) @@ -122,44 +135,42 @@ def setUp(self): event="session end", distinct_id="blabla", properties={ - "session duration": 400, + "xyz": 400, "location": "", - "$session_id": "1", + "$session_id": s1, "$group_0": "org:5", }, ) def _run(self, extra: Optional[dict] = None, run_at: Optional[str] = None): - if extra is None: - extra = {} + flush_persons_and_events() + query_dict = { + "series": [ + { + "event": "session start", + "math": "sum", + "math_property": "xyz", + }, + { + "event": "session start", + "math": "avg", + "math_property": "xyz", + }, + ], + "trendsFilter": TrendsFilter(formula="A + B"), + } + if extra: + query_dict.update(extra) with freeze_time(run_at or "2020-01-04T13:01:01Z"): - action_response = Trends().run( - Filter( - data={ - "events": [ - { - "id": "session start", - "math": "sum", - "math_property": "session duration", - }, - { - "id": "session start", - "math": "avg", - "math_property": "session duration", - }, - ], - "formula": "A + B", - **extra, - }, - team=self.team, - ), - self.team, - ) - return action_response + trend_query = TrendsQuery(**query_dict) + tqr = TrendsQueryRunner(team=self.team, query=trend_query) + return tqr.calculate().results @snapshot_clickhouse_queries def test_hour_interval_hour_level_relative(self): - data = self._run({"date_from": "-24h", "interval": "hour"}, run_at="2020-01-03T13:05:01Z")[0]["data"] + data = self._run({"dateRange": {"date_from": "-24h"}, "interval": "hour"}, run_at="2020-01-03T13:05:01Z")[0][ + "data" + ] self.assertEqual( data, [ @@ -193,7 +204,9 @@ def test_hour_interval_hour_level_relative(self): @snapshot_clickhouse_queries def test_hour_interval_day_level_relative(self): - data = self._run({"date_from": "-1d", "interval": "hour"}, run_at="2020-01-03T13:05:01Z")[0]["data"] + data = self._run({"dateRange": {"date_from": "-1d"}, "interval": "hour"}, run_at="2020-01-03T13:05:01Z")[0][ + "data" + ] self.assertEqual( data, [ @@ -239,84 +252,86 @@ def test_hour_interval_day_level_relative(self): ) def test_day_interval(self): - data = self._run({"date_from": "-3d"}, run_at="2020-01-03T13:05:01Z")[0]["data"] + data = self._run({"dateRange": {"date_from": "-3d"}}, run_at="2020-01-03T13:05:01Z")[0]["data"] self.assertEqual(data, [0.0, 0.0, 1200.0, 1350.0]) def test_week_interval(self): - data = self._run({"date_from": "-2w", "interval": "week"}, run_at="2020-01-03T13:05:01Z")[0]["data"] + data = self._run({"dateRange": {"date_from": "-2w"}, "interval": "week"}, run_at="2020-01-03T13:05:01Z")[0][ + "data" + ] self.assertEqual(data, [0.0, 0.0, 2160.0]) def test_month_interval(self): - data = self._run({"date_from": "-2m", "interval": "month"}, run_at="2020-01-03T13:05:01Z")[0]["data"] + data = self._run({"dateRange": {"date_from": "-2m"}, "interval": "month"}, run_at="2020-01-03T13:05:01Z")[0][ + "data" + ] self.assertEqual(data, [0.0, 0.0, 2160.0]) def test_formula(self): self.assertEqual( - self._run({"formula": "A - B"})[0]["data"], + self._run({"trendsFilter": {"formula": "A - B"}})[0]["data"], [0.0, 0.0, 0.0, 0.0, 0.0, 600.0, 450.0, 0.0], ) self.assertEqual( - self._run({"formula": "A * B"})[0]["data"], + self._run({"trendsFilter": {"formula": "A * B"}})[0]["data"], [0.0, 0.0, 0.0, 0.0, 0.0, 270000.0, 405000.0, 0.0], ) self.assertEqual( - self._run({"formula": "A / B"})[0]["data"], + self._run({"trendsFilter": {"formula": "A / B"}})[0]["data"], [0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 2.0, 0.0], ) self.assertEqual( - self._run({"formula": "(A/3600)/B"})[0]["data"], + self._run({"trendsFilter": {"formula": "(A/3600)/B"}})[0]["data"], [0.0, 0.0, 0.0, 0.0, 0.0, 1 / 1200, 1 / 1800, 0.0], ) - self.assertEqual(self._run({"formula": "(A/3600)/B"})[0]["count"], 1 / 720) + self.assertEqual(self._run({"trendsFilter": {"formula": "(A/3600)/B"}})[0]["count"], 1 / 720) self.assertEqual( - self._run({"formula": "A/0"})[0]["data"], + self._run({"trendsFilter": {"formula": "A/0"}})[0]["data"], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], ) - self.assertEqual(self._run({"formula": "A/0"})[0]["count"], 0) + self.assertEqual(self._run({"trendsFilter": {"formula": "A/0"}})[0]["count"], 0) @snapshot_clickhouse_queries def test_formula_with_unique_sessions(self): with freeze_time("2020-01-04T13:01:01Z"): - action_response = Trends().run( - Filter( - data={ - "events": [ - {"id": "session start", "math": "unique_session"}, - {"id": "session start", "math": "dau"}, - ], + action_response = self._run( + { + "series": [ + {"event": "session start", "math": "unique_session"}, + {"event": "session start", "math": "dau"}, + ], + "trendsFilter": { "formula": "A / B", - } - ), - self.team, + }, + } ) self.assertEqual(action_response[0]["data"], [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0]) @snapshot_clickhouse_queries def test_regression_formula_with_unique_sessions_2x_and_duration_filter(self): with freeze_time("2020-01-04T13:01:01Z"): - action_response = Trends().run( - Filter( - data={ - "events": [ - { - "id": "session start", - "math": "unique_session", - "properties": [ - { - "key": "$session_duration", - "value": 12, - "operator": "gt", - "type": "session", - } - ], - }, - {"id": "session start", "math": "unique_session"}, - ], + action_response = self._run( + { + "series": [ + { + "event": "session start", + "math": "unique_session", + "properties": [ + { + "key": "$session_duration", + "value": 12, + "operator": "gt", + "type": "session", + } + ], + }, + {"event": "session start", "math": "unique_session"}, + ], + "trendsFilter": { "formula": "A / B", - } - ), - self.team, + }, + } ) self.assertEqual(action_response[0]["data"], [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0]) @@ -324,39 +339,38 @@ def test_regression_formula_with_unique_sessions_2x_and_duration_filter(self): @snapshot_clickhouse_queries def test_regression_formula_with_unique_sessions_2x_and_duration_filter_2x(self): with freeze_time("2020-01-04T13:01:01Z"): - action_response = Trends().run( - Filter( - data={ - "events": [ - { - "id": "$autocapture", - "math": "unique_session", - "properties": [ - { - "key": "$session_duration", - "type": "session", - "value": 30, - "operator": "lt", - } - ], - }, - { - "id": "session start", - "math": "unique_session", - "properties": [ - { - "key": "$session_duration", - "type": "session", - "value": 500, - "operator": "gt", - } - ], - }, - ], + action_response = self._run( + { + "series": [ + { + "event": "$autocapture", + "math": "unique_session", + "properties": [ + { + "key": "$session_duration", + "type": "session", + "value": 30, + "operator": "lt", + } + ], + }, + { + "event": "session start", + "math": "unique_session", + "properties": [ + { + "key": "$session_duration", + "type": "session", + "value": 500, + "operator": "gt", + } + ], + }, + ], + "trendsFilter": { "formula": "B", - } - ), - self.team, + }, + } ) self.assertEqual(action_response[0]["data"], [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0]) @@ -364,30 +378,25 @@ def test_regression_formula_with_unique_sessions_2x_and_duration_filter_2x(self) @snapshot_clickhouse_queries def test_regression_formula_with_session_duration_aggregation(self): with freeze_time("2020-01-04T13:01:01Z"): - action_response = Trends().run( - Filter( - data={ - "events": [ - { - "type": "events", - "id": "session start", - "order": 0, - "name": "$pageview", - "math": "avg", - "math_property": "$session_duration", - }, - { - "type": "events", - "id": "session end", - "order": 1, - "name": "$pageview", - "math": "total", - }, - ], + action_response = self._run( + { + "series": [ + { + "event": "session start", + "name": "$pageview", + "math": "avg", + "math_property": "$session_duration", + }, + { + "event": "session end", + "name": "$pageview", + "math": "total", + }, + ], + "trendsFilter": { "formula": "A / B", - } - ), - self.team, + }, + } ) self.assertEqual(action_response[0]["data"], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 28860.0, 0.0]) @@ -395,33 +404,27 @@ def test_regression_formula_with_session_duration_aggregation(self): @snapshot_clickhouse_queries def test_aggregated_one_without_events(self): with freeze_time("2020-01-04T13:01:01Z"): - response = Trends().run( - Filter( - data={ - "insight": "TRENDS", + response = self._run( + { + "trendsFilter": { "display": TRENDS_BOLD_NUMBER, "formula": "B + A", - "events": [ - { - "id": "session start", - "name": "session start", - "type": "events", - "order": 0, - "math": "sum", - "math_property": "session duration", - }, - { - "id": "session error", - "name": "session error", - "type": "events", - "order": 1, - "math": "sum", - "math_property": "session not here", - }, - ], - } - ), - self.team, + }, + "series": [ + { + "event": "session start", + "name": "session start", + "math": "sum", + "math_property": "xyz", + }, + { + "event": "session error", + "name": "session error", + "math": "sum", + "math_property": "session not here", + }, + ], + } ) self.assertEqual(response[0]["aggregated_value"], 1800) @@ -429,84 +432,90 @@ def test_aggregated_one_without_events(self): @snapshot_clickhouse_queries def test_breakdown(self): - response = self._run({"formula": "A - B", "breakdown": "location"}) + response = self._run({"trendsFilter": {"formula": "A - B"}, "breakdownFilter": {"breakdown": "location"}}) + self.assertEqual(len(response), 2) self.assertEqual(response[0]["data"], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 450.0, 0.0]) - self.assertEqual(response[0]["label"], "London") + self.assertEqual(response[0]["breakdown_value"], "London") self.assertEqual(response[1]["data"], [0.0, 0.0, 0.0, 0.0, 0.0, 250.0, 0.0, 0.0]) - self.assertEqual(response[1]["label"], "Paris") + self.assertEqual(response[1]["label"], "Formula (A - B)") + self.assertEqual(response[1]["breakdown_value"], "Paris") @snapshot_clickhouse_queries def test_breakdown_aggregated(self): - response = self._run({"formula": "A - B", "breakdown": "location", "display": TRENDS_PIE}) + response = self._run( + {"trendsFilter": {"formula": "A - B", "display": TRENDS_PIE}, "breakdownFilter": {"breakdown": "location"}} + ) + self.assertEqual(len(response), 2) self.assertEqual(response[0]["aggregated_value"], 866.6666666666667) - self.assertEqual(response[0]["label"], "London") + self.assertEqual(response[0]["label"], "Formula (A - B)") + self.assertEqual(response[0]["breakdown_value"], "London") self.assertEqual(response[1]["aggregated_value"], 250) - self.assertEqual(response[1]["label"], "Paris") + self.assertEqual(response[1]["label"], "Formula (A - B)") + self.assertEqual(response[1]["breakdown_value"], "Paris") @snapshot_clickhouse_queries def test_breakdown_with_different_breakdown_values_per_series(self): response = self._run( { - "events": [ + "series": [ { - "id": "session start", + "event": "session start", "math": "sum", - "math_property": "session duration", + "math_property": "xyz", }, { - "id": "session end", + "event": "session end", "math": "sum", - "math_property": "session duration", + "math_property": "xyz", }, ], - "formula": "A + B", - "breakdown": "location", + "trendsFilter": {"formula": "A + B"}, + "breakdownFilter": {"breakdown": "location"}, } ) - self.assertEqual(response[0]["label"], "London") + self.assertEqual(len(response), 4) + self.assertEqual(response[0]["data"], [0.0, 0.0, 0.0, 0.0, 0.0, 400.0, 1400.0, 0.0]) + self.assertEqual(response[0]["label"], "Formula (A + B)") + self.assertEqual(response[0]["breakdown_value"], "London") - self.assertEqual(response[1]["label"], "Paris") self.assertEqual(response[1]["data"], [0.0, 0.0, 0.0, 0.0, 0.0, 500.0, 0.0, 0.0]) + self.assertEqual(response[1]["label"], "Formula (A + B)") + self.assertEqual(response[1]["breakdown_value"], "Paris") # Regression test to ensure we actually get data for "Belo Horizonte" below # We previously had a bug where if series B,C,D, etc. had a value not present # in series A, we'd just default to an empty string - self.assertEqual(response[2]["label"], "Belo Horizonte") self.assertEqual(response[2]["data"], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 500.0, 0.0]) + self.assertEqual(response[2]["label"], "Formula (A + B)") + self.assertEqual(response[2]["breakdown_value"], "Belo Horizonte") # empty string values are considered "None" - self.assertEqual(response[3]["label"], "$$_posthog_breakdown_null_$$") self.assertEqual(response[3]["data"], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 400.0, 0.0]) + self.assertEqual(response[3]["label"], "Formula (A + B)") + self.assertEqual(response[3]["breakdown_value"], "$$_posthog_breakdown_null_$$") def test_breakdown_counts_of_different_events_one_without_events(self): with freeze_time("2020-01-04T13:01:01Z"): - response = Trends().run( - Filter( - data={ - "insight": "TRENDS", - "display": "ActionsLineGraph", - "formula": "B / A", + response = self._run( + { + "trendsFilter": {"display": "ActionsLineGraph", "formula": "B / A"}, + "breakdownFilter": { "breakdown": "location", "breakdown_type": "event", - "events": [ - { - "id": "session start", - "name": "session start", - "type": "events", - "order": 0, - }, - { - "id": "session error", - "name": "session error", - "type": "events", - "order": 1, - }, - ], - } - ), - self.team, + }, + "series": [ + { + "event": "session start", + "name": "session start", + }, + { + "event": "session error", + "name": "session error", + }, + ], + } ) self.assertEqual( response, @@ -534,8 +543,10 @@ def test_breakdown_counts_of_different_events_one_without_events(self): "2020-01-03", "2020-01-04", ], - "label": "London", - "breakdown_value": "London", + "label": "Formula (B / A)", + "breakdown_value": "Paris", + "action": None, + "filter": mock.ANY, }, { "data": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], @@ -560,35 +571,46 @@ def test_breakdown_counts_of_different_events_one_without_events(self): "2020-01-03", "2020-01-04", ], - "label": "Paris", - "breakdown_value": "Paris", + "label": "Formula (B / A)", + "breakdown_value": "London", + "action": None, + "filter": mock.ANY, }, ], ) @snapshot_clickhouse_queries def test_breakdown_cohort(self): - cohort = Cohort.objects.create( + cohort: Cohort = Cohort.objects.create( + id=999932324, team=self.team, name="cohort1", groups=[{"properties": [{"key": "$some_prop", "value": "some_val", "type": "person"}]}], ) - response = self._run({"breakdown": ["all", cohort.pk], "breakdown_type": "cohort"}) + cohort.calculate_people_ch(pending_version=0) + + response = self._run({"breakdownFilter": {"breakdown": ["all", cohort.pk], "breakdown_type": "cohort"}}) + + self.assertEqual(len(response), 2) self.assertEqual(response[0]["data"], [0.0, 0.0, 0.0, 0.0, 0.0, 1200.0, 1350.0, 0.0]) - self.assertEqual(response[0]["label"], "all users") + self.assertEqual(response[0]["breakdown_value"], "all") + self.assertEqual(response[0]["label"], "Formula (A + B)") self.assertEqual(response[1]["data"], [0.0, 0.0, 0.0, 0.0, 0.0, 1200.0, 1350.0, 0.0]) - self.assertEqual(response[1]["label"], "cohort1") + self.assertEqual(response[1]["label"], "Formula (A + B)") + self.assertEqual(response[1]["breakdown_value"], cohort.pk) @snapshot_clickhouse_queries def test_breakdown_hogql(self): response = self._run( { - "breakdown": "concat(person.properties.$some_prop, ' : ', properties.location)", - "breakdown_type": "hogql", + "breakdownFilter": { + "breakdown": "concat(person.properties.$some_prop, ' : ', properties.location)", + "breakdown_type": "hogql", + } } ) self.assertEqual( - [series["label"] for series in response], + [series["breakdown_value"] for series in response], ["some_val : London", "some_val : Paris"], ) self.assertEqual( @@ -602,16 +624,23 @@ def test_breakdown_hogql(self): def test_breakdown_mismatching_sizes(self): response = self._run( { - "events": [{"id": "session start"}, {"id": "session end"}], - "breakdown": "location", - "formula": "A + B", + "series": [{"event": "session start"}, {"event": "session end"}], + "breakdownFilter": { + "breakdown": "location", + }, + "trendsFilter": {"formula": "A + B"}, } ) - self.assertEqual(response[0]["label"], "London") + self.assertEqual(len(response), 4, response) + self.assertEqual(response[0]["breakdown_value"], "London") self.assertEqual(response[0]["data"], [0, 0, 0, 0, 0, 1, 3, 0]) - self.assertEqual(response[1]["label"], "Paris") + self.assertEqual(response[1]["breakdown_value"], "Paris") self.assertEqual(response[1]["data"], [0, 0, 0, 0, 0, 2, 0, 0]) + self.assertEqual(response[2]["breakdown_value"], "Belo Horizonte") + self.assertEqual(response[2]["data"], [0, 0, 0, 0, 0, 0, 1, 0]) + self.assertEqual(response[3]["breakdown_value"], "$$_posthog_breakdown_null_$$") + self.assertEqual(response[3]["data"], [0, 0, 0, 0, 0, 0, 1, 0]) def test_global_properties(self): self.assertEqual( @@ -639,17 +668,17 @@ def test_event_properties(self): self.assertEqual( self._run( { - "events": [ + "series": [ { - "id": "session start", + "event": "session start", "math": "sum", - "math_property": "session duration", + "math_property": "xyz", "properties": [{"key": "$current_url", "value": "http://example.org"}], }, { - "id": "session start", + "event": "session start", "math": "avg", - "math_property": "session duration", + "math_property": "xyz", }, ] } @@ -658,41 +687,73 @@ def test_event_properties(self): ) def test_compare(self): - response = self._run({"date_from": "-1dStart", "compare": True}) + response = self._run( + { + "dateRange": { + "date_from": "-1dStart", + }, + "compareFilter": {"compare": True}, + } + ) self.assertEqual(response[0]["days"], ["2020-01-03", "2020-01-04"]) self.assertEqual(response[1]["days"], ["2020-01-01", "2020-01-02"]) self.assertEqual(response[0]["data"], [1350.0, 0.0]) self.assertEqual(response[1]["data"], [0.0, 1200.0]) def test_aggregated(self): - self.assertEqual(self._run({"display": TRENDS_PIE})[0]["aggregated_value"], 2160.0) + self.assertEqual( + self._run( + { + "trendsFilter": { + "display": TRENDS_PIE, + "formula": "A + B", + } + } + )[0]["aggregated_value"], + 2160.0, + ) def test_cumulative(self): + response = self._run({"trendsFilter": {"display": TRENDS_CUMULATIVE, "formula": "A + B"}}) + self.assertEqual(len(response), 1) self.assertEqual( - self._run({"display": TRENDS_CUMULATIVE})[0]["data"], + response[0]["data"], [0.0, 0.0, 0.0, 0.0, 0.0, 1200.0, 2550.0, 2550.0], ) + self.assertEqual( + response[0]["days"], + [ + "2019-12-28", + "2019-12-29", + "2019-12-30", + "2019-12-31", + "2020-01-01", + "2020-01-02", + "2020-01-03", + "2020-01-04", + ], + ) def test_multiple_events(self): # regression test self.assertEqual( self._run( { - "events": [ + "series": [ { - "id": "session start", + "event": "session start", "math": "sum", - "math_property": "session duration", + "math_property": "xyz", }, { - "id": "session start", + "event": "session start", "math": "avg", - "math_property": "session duration", + "math_property": "xyz", }, { - "id": "session start", + "event": "session start", "math": "avg", - "math_property": "session duration", + "math_property": "xyz", }, ] } @@ -704,9 +765,9 @@ def test_session_formulas(self): self.assertEqual( self._run( { - "events": [ - {"id": "session start", "math": "unique_session"}, - {"id": "session start", "math": "unique_session"}, + "series": [ + {"event": "session start", "math": "unique_session"}, + {"event": "session start", "math": "unique_session"}, ] } )[0]["data"], @@ -717,14 +778,14 @@ def test_group_formulas(self): self.assertEqual( self._run( { - "events": [ + "series": [ { - "id": "session start", + "event": "session start", "math": "unique_group", "math_group_type_index": 0, }, { - "id": "session start", + "event": "session start", "math": "unique_group", "math_group_type_index": 0, }, diff --git a/posthog/hogql_queries/insights/trends/test/test_trends.py b/posthog/hogql_queries/insights/trends/test/test_trends.py index 18e0e2b267fda..4543f9a5bc52e 100644 --- a/posthog/hogql_queries/insights/trends/test/test_trends.py +++ b/posthog/hogql_queries/insights/trends/test/test_trends.py @@ -211,8 +211,6 @@ class TestTrends(ClickhouseTestMixin, APIBaseTest): def _run(self, filter: Filter, team: Team): flush_persons_and_events() - # trend_query = filter_to_query(filter.to_dict()) - trend_query = convert_filter_to_trends_query(filter) tqr = TrendsQueryRunner(team=team, query=trend_query) return tqr.calculate().results diff --git a/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py b/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py index e15175a7ffd7f..66a1fee3d27ea 100644 --- a/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py +++ b/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py @@ -944,13 +944,14 @@ def test_formula_with_multi_cohort_all_breakdown(self): assert len(response.results) == 2 assert response.results[0]["label"] == "Formula (A+B)" - assert response.results[0]["breakdown_value"] == "all" - assert response.results[0]["count"] == 16 + assert response.results[0]["breakdown_value"] == cohort1.pk + assert response.results[0]["count"] == 9 + assert response.results[0]["data"] == [0, 0, 2, 2, 2, 0, 1, 0, 1, 0, 1, 0] assert response.results[1]["label"] == "Formula (A+B)" - assert response.results[1]["breakdown_value"] == cohort1.pk - assert response.results[1]["count"] == 9 - assert response.results[1]["data"] == [0, 0, 2, 2, 2, 0, 1, 0, 1, 0, 1, 0] + assert response.results[1]["breakdown_value"] == "all" + assert response.results[1]["count"] == 16 + assert response.results[1]["data"] == [1, 0, 2, 4, 4, 0, 2, 1, 1, 0, 1, 0] # action needs to be unset to display custom label assert response.results[0]["action"] is None diff --git a/posthog/hogql_queries/insights/trends/trends_query_runner.py b/posthog/hogql_queries/insights/trends/trends_query_runner.py index 8c490a58684ed..e6675a261553e 100644 --- a/posthog/hogql_queries/insights/trends/trends_query_runner.py +++ b/posthog/hogql_queries/insights/trends/trends_query_runner.py @@ -829,13 +829,29 @@ def apply_formula( "days": any_result.get("days"), } ) - new_result = self.apply_formula_to_results_group(row_results, formula, is_total_value) + new_result = self.apply_formula_to_results_group( + row_results, formula, breakdown_value=breakdown_value, aggregate_values=is_total_value + ) computed_results.append(new_result) if has_compare: return multisort(computed_results, (("compare_label", False), ("count", True))) - return sorted(computed_results, key=itemgetter("count"), reverse=True) + return sorted( + computed_results, + key=lambda s: ( + 0 + if s.get("breakdown_value") not in (BREAKDOWN_NULL_STRING_LABEL, BREAKDOWN_OTHER_STRING_LABEL) + else -1 + if s["breakdown_value"] == BREAKDOWN_NULL_STRING_LABEL + else -2, + s.get("aggregated_value", sum(s.get("data") or [])), + s.get("count"), + s.get("data"), + repr(s.get("breakdown_value")), + ), + reverse=True, + ) else: return [ self.apply_formula_to_results_group([r[0] for r in results], formula, aggregate_values=is_total_value) @@ -843,7 +859,11 @@ def apply_formula( @staticmethod def apply_formula_to_results_group( - results_group: list[dict[str, Any]], formula: str, aggregate_values: Optional[bool] = False + results_group: list[dict[str, Any]], + formula: str, + *, + breakdown_value: Any = None, + aggregate_values: Optional[bool] = False, ) -> dict[str, Any]: """ Applies the formula to a list of results, resulting in a single, computed result. diff --git a/posthog/hogql_queries/utils/query_date_range.py b/posthog/hogql_queries/utils/query_date_range.py index 1f5d5bf7996a1..261828dec88ba 100644 --- a/posthog/hogql_queries/utils/query_date_range.py +++ b/posthog/hogql_queries/utils/query_date_range.py @@ -1,4 +1,3 @@ -import re from datetime import datetime, timedelta from functools import cached_property from typing import cast, Literal, Optional @@ -41,8 +40,8 @@ def __init__( self._interval = interval or IntervalType.DAY self._now_without_timezone = now - if not isinstance(self._interval, IntervalType) or re.match(r"[^a-z]", "DAY", re.IGNORECASE): - raise ValueError(f"Invalid interval: {interval}") + if not isinstance(self._interval, IntervalType): + raise ValueError(f"Value {repr(interval)} is not an instance of IntervalType") def date_to(self) -> datetime: date_to = self.now_with_timezone diff --git a/posthog/queries/trends/test/__snapshots__/test_formula.ambr b/posthog/queries/trends/test/__snapshots__/test_formula.ambr deleted file mode 100644 index 22f8fbc76cb71..0000000000000 --- a/posthog/queries/trends/test/__snapshots__/test_formula.ambr +++ /dev/null @@ -1,1028 +0,0 @@ -# serializer version: 1 -# name: TestFormula.test_aggregated_one_without_events - ''' - SELECT '' as date, - arrayMap((A, B) -> B + A, [ifNull(sub_A.total, 0)], [ifNull(sub_B.total, 0)]) - FROM - (SELECT sum(toFloat64OrNull(replaceRegexpAll(JSONExtractRaw(properties, 'session duration'), '^"|"$', ''))) AS total - FROM events e - WHERE team_id = 2 - AND event = 'session start' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') ) as sub_A - CROSS JOIN - (SELECT sum(toFloat64OrNull(replaceRegexpAll(JSONExtractRaw(properties, 'session not here'), '^"|"$', ''))) AS total - FROM events e - WHERE team_id = 2 - AND event = 'session error' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') ) as sub_B - ''' -# --- -# name: TestFormula.test_breakdown - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(properties, 'location'), '^"|"$', '') AS value, - sum(toFloat64OrNull(replaceRegexpAll(JSONExtractRaw(properties, 'session duration'), '^"|"$', ''))) as count - FROM events e - WHERE team_id = 2 - AND event = 'session start' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2019-12-28 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestFormula.test_breakdown.1 - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(properties, 'location'), '^"|"$', '') AS value, - avg(toFloat64OrNull(replaceRegexpAll(JSONExtractRaw(properties, 'session duration'), '^"|"$', ''))) as count - FROM events e - WHERE team_id = 2 - AND event = 'session start' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2019-12-28 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestFormula.test_breakdown.2 - ''' - SELECT sub_A.date, - arrayMap((A, B) -> A - B, arrayResize(sub_A.total, max_length, 0), arrayResize(sub_B.total, max_length, 0)) , - arrayFilter(x -> notEmpty(x), [replaceRegexpAll(sub_A.breakdown_value, '^"|"$', ''), replaceRegexpAll(sub_B.breakdown_value, '^"|"$', '')])[1] , - arrayMax([length(sub_A.total), length(sub_B.total)]) as max_length - FROM - (SELECT groupArray(day_start) as date, - groupArray(count) AS total, - breakdown_value - FROM - (SELECT SUM(total) as count, - day_start, - breakdown_value - FROM - (SELECT * - FROM - (SELECT toUInt16(0) AS total, - ticks.day_start as day_start, - breakdown_value - FROM - (SELECT toStartOfDay(toDateTime('2020-01-04 23:59:59', 'UTC')) - toIntervalDay(number) as day_start - FROM numbers(8) - UNION ALL SELECT toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')) as day_start) as ticks - CROSS JOIN - (SELECT breakdown_value - FROM - (SELECT ['London', 'Paris'] as breakdown_value) ARRAY - JOIN breakdown_value) as sec - ORDER BY breakdown_value, - day_start - UNION ALL SELECT sum(toFloat64OrNull(replaceRegexpAll(JSONExtractRaw(properties, 'session duration'), '^"|"$', ''))) as total, - toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) as day_start, - transform(ifNull(nullIf(replaceRegexpAll(JSONExtractRaw(properties, 'location'), '^"|"$', ''), ''), '$$_posthog_breakdown_null_$$'), (['London', 'Paris']), (['London', 'Paris']), '$$_posthog_breakdown_other_$$') as breakdown_value - FROM events e - WHERE e.team_id = 2 - AND event = 'session start' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') - GROUP BY day_start, - breakdown_value)) - GROUP BY day_start, - breakdown_value - ORDER BY breakdown_value, - day_start) - GROUP BY breakdown_value - ORDER BY breakdown_value) as sub_A - FULL OUTER JOIN - (SELECT groupArray(day_start) as date, - groupArray(count) AS total, - breakdown_value - FROM - (SELECT SUM(total) as count, - day_start, - breakdown_value - FROM - (SELECT * - FROM - (SELECT toUInt16(0) AS total, - ticks.day_start as day_start, - breakdown_value - FROM - (SELECT toStartOfDay(toDateTime('2020-01-04 23:59:59', 'UTC')) - toIntervalDay(number) as day_start - FROM numbers(8) - UNION ALL SELECT toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')) as day_start) as ticks - CROSS JOIN - (SELECT breakdown_value - FROM - (SELECT ['London', 'Paris'] as breakdown_value) ARRAY - JOIN breakdown_value) as sec - ORDER BY breakdown_value, - day_start - UNION ALL SELECT avg(toFloat64OrNull(replaceRegexpAll(JSONExtractRaw(properties, 'session duration'), '^"|"$', ''))) as total, - toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) as day_start, - transform(ifNull(nullIf(replaceRegexpAll(JSONExtractRaw(properties, 'location'), '^"|"$', ''), ''), '$$_posthog_breakdown_null_$$'), (['London', 'Paris']), (['London', 'Paris']), '$$_posthog_breakdown_other_$$') as breakdown_value - FROM events e - WHERE e.team_id = 2 - AND event = 'session start' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') - GROUP BY day_start, - breakdown_value)) - GROUP BY day_start, - breakdown_value - ORDER BY breakdown_value, - day_start) - GROUP BY breakdown_value - ORDER BY breakdown_value) as sub_B ON sub_A.breakdown_value = sub_B.breakdown_value - ''' -# --- -# name: TestFormula.test_breakdown_aggregated - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(properties, 'location'), '^"|"$', '') AS value, - sum(toFloat64OrNull(replaceRegexpAll(JSONExtractRaw(properties, 'session duration'), '^"|"$', ''))) as count - FROM events e - WHERE team_id = 2 - AND event = 'session start' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2019-12-28 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestFormula.test_breakdown_aggregated.1 - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(properties, 'location'), '^"|"$', '') AS value, - avg(toFloat64OrNull(replaceRegexpAll(JSONExtractRaw(properties, 'session duration'), '^"|"$', ''))) as count - FROM events e - WHERE team_id = 2 - AND event = 'session start' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2019-12-28 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestFormula.test_breakdown_aggregated.2 - ''' - SELECT '' as date, - arrayMap((A, B) -> A - B, [ifNull(sub_A.total, 0)], [ifNull(sub_B.total, 0)]) , - arrayFilter(x -> notEmpty(x), [replaceRegexpAll(sub_A.breakdown_value, '^"|"$', ''), replaceRegexpAll(sub_B.breakdown_value, '^"|"$', '')])[1] - FROM - (SELECT sum(toFloat64OrNull(replaceRegexpAll(JSONExtractRaw(properties, 'session duration'), '^"|"$', ''))) AS total, - transform(ifNull(nullIf(replaceRegexpAll(JSONExtractRaw(properties, 'location'), '^"|"$', ''), ''), '$$_posthog_breakdown_null_$$'), (['London', 'Paris']), (['London', 'Paris']), '$$_posthog_breakdown_other_$$') AS breakdown_value - FROM events e - WHERE e.team_id = 2 - AND event = 'session start' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') - GROUP BY breakdown_value - ORDER BY breakdown_value) as sub_A - FULL OUTER JOIN - (SELECT avg(toFloat64OrNull(replaceRegexpAll(JSONExtractRaw(properties, 'session duration'), '^"|"$', ''))) AS total, - transform(ifNull(nullIf(replaceRegexpAll(JSONExtractRaw(properties, 'location'), '^"|"$', ''), ''), '$$_posthog_breakdown_null_$$'), (['London', 'Paris']), (['London', 'Paris']), '$$_posthog_breakdown_other_$$') AS breakdown_value - FROM events e - WHERE e.team_id = 2 - AND event = 'session start' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') - GROUP BY breakdown_value - ORDER BY breakdown_value) as sub_B ON sub_A.breakdown_value = sub_B.breakdown_value - ''' -# --- -# name: TestFormula.test_breakdown_cohort - ''' - SELECT sub_A.date, - arrayMap((A, B) -> A + B, arrayResize(sub_A.total, max_length, 0), arrayResize(sub_B.total, max_length, 0)) , - arrayFilter(x -> x != 0, [sub_A.breakdown_value, sub_B.breakdown_value])[1] , - arrayMax([length(sub_A.total), length(sub_B.total)]) as max_length - FROM - (SELECT groupArray(day_start) as date, - groupArray(count) AS total, - breakdown_value - FROM - (SELECT SUM(total) as count, - day_start, - breakdown_value - FROM - (SELECT * - FROM - (SELECT toUInt16(0) AS total, - ticks.day_start as day_start, - breakdown_value - FROM - (SELECT toStartOfDay(toDateTime('2020-01-04 23:59:59', 'UTC')) - toIntervalDay(number) as day_start - FROM numbers(8) - UNION ALL SELECT toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')) as day_start) as ticks - CROSS JOIN - (SELECT breakdown_value - FROM - (SELECT [1, 2] as breakdown_value) ARRAY - JOIN breakdown_value) as sec - ORDER BY breakdown_value, - day_start - UNION ALL SELECT sum(toFloat64OrNull(replaceRegexpAll(JSONExtractRaw(properties, 'session duration'), '^"|"$', ''))) as total, - toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) as day_start, - value as breakdown_value - FROM events e - INNER JOIN - (SELECT distinct_id, - 1 as value - FROM - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 2 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) - WHERE person_id IN - (SELECT id - FROM person - WHERE team_id = 2 - AND id IN - (SELECT id - FROM person - WHERE team_id = 2 - AND ((has(['some_val'], replaceRegexpAll(JSONExtractRaw(properties, '$some_prop'), '^"|"$', '')))) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND ((has(['some_val'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), '$some_prop'), '^"|"$', '')))) SETTINGS optimize_aggregation_in_order = 1) - UNION ALL SELECT DISTINCT distinct_id, - 0 as value - FROM events all_events - WHERE team_id = 2 - AND toTimeZone(all_events.timestamp, 'UTC') >= toDateTime('2019-12-28 00:00:00', 'UTC') - AND toTimeZone(all_events.timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') ) ep ON e.distinct_id = ep.distinct_id - where team_id = 2 - AND event = 'session start' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') - GROUP BY day_start, - breakdown_value)) - GROUP BY day_start, - breakdown_value - ORDER BY breakdown_value, - day_start) - GROUP BY breakdown_value - ORDER BY breakdown_value) as sub_A - FULL OUTER JOIN - (SELECT groupArray(day_start) as date, - groupArray(count) AS total, - breakdown_value - FROM - (SELECT SUM(total) as count, - day_start, - breakdown_value - FROM - (SELECT * - FROM - (SELECT toUInt16(0) AS total, - ticks.day_start as day_start, - breakdown_value - FROM - (SELECT toStartOfDay(toDateTime('2020-01-04 23:59:59', 'UTC')) - toIntervalDay(number) as day_start - FROM numbers(8) - UNION ALL SELECT toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')) as day_start) as ticks - CROSS JOIN - (SELECT breakdown_value - FROM - (SELECT [1, 2] as breakdown_value) ARRAY - JOIN breakdown_value) as sec - ORDER BY breakdown_value, - day_start - UNION ALL SELECT avg(toFloat64OrNull(replaceRegexpAll(JSONExtractRaw(properties, 'session duration'), '^"|"$', ''))) as total, - toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) as day_start, - value as breakdown_value - FROM events e - INNER JOIN - (SELECT distinct_id, - 1 as value - FROM - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 2 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) - WHERE person_id IN - (SELECT id - FROM person - WHERE team_id = 2 - AND id IN - (SELECT id - FROM person - WHERE team_id = 2 - AND ((has(['some_val'], replaceRegexpAll(JSONExtractRaw(properties, '$some_prop'), '^"|"$', '')))) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND ((has(['some_val'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), '$some_prop'), '^"|"$', '')))) SETTINGS optimize_aggregation_in_order = 1) - UNION ALL SELECT DISTINCT distinct_id, - 0 as value - FROM events all_events - WHERE team_id = 2 - AND toTimeZone(all_events.timestamp, 'UTC') >= toDateTime('2019-12-28 00:00:00', 'UTC') - AND toTimeZone(all_events.timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') ) ep ON e.distinct_id = ep.distinct_id - where team_id = 2 - AND event = 'session start' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') - GROUP BY day_start, - breakdown_value)) - GROUP BY day_start, - breakdown_value - ORDER BY breakdown_value, - day_start) - GROUP BY breakdown_value - ORDER BY breakdown_value) as sub_B ON sub_A.breakdown_value = sub_B.breakdown_value - ''' -# --- -# name: TestFormula.test_breakdown_hogql - ''' - - SELECT concat(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_props, '$some_prop'), ''), 'null'), '^"|"$', '')), ''), ' : ', ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'location'), ''), 'null'), '^"|"$', '')), '')) AS value, - sum(toFloat64OrNull(replaceRegexpAll(JSONExtractRaw(properties, 'session duration'), '^"|"$', ''))) as count - FROM events e - INNER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 2 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id, - argMax(properties, version) as person_props - FROM person - WHERE team_id = 2 - GROUP BY id - HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1) person ON pdi.person_id = person.id - WHERE team_id = 2 - AND event = 'session start' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2019-12-28 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestFormula.test_breakdown_hogql.1 - ''' - - SELECT concat(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_props, '$some_prop'), ''), 'null'), '^"|"$', '')), ''), ' : ', ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'location'), ''), 'null'), '^"|"$', '')), '')) AS value, - avg(toFloat64OrNull(replaceRegexpAll(JSONExtractRaw(properties, 'session duration'), '^"|"$', ''))) as count - FROM events e - INNER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 2 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id, - argMax(properties, version) as person_props - FROM person - WHERE team_id = 2 - GROUP BY id - HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1) person ON pdi.person_id = person.id - WHERE team_id = 2 - AND event = 'session start' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2019-12-28 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestFormula.test_breakdown_hogql.2 - ''' - SELECT sub_A.date, - arrayMap((A, B) -> A + B, arrayResize(sub_A.total, max_length, 0), arrayResize(sub_B.total, max_length, 0)) , - arrayFilter(x -> notEmpty(x), [replaceRegexpAll(sub_A.breakdown_value, '^"|"$', ''), replaceRegexpAll(sub_B.breakdown_value, '^"|"$', '')])[1] , - arrayMax([length(sub_A.total), length(sub_B.total)]) as max_length - FROM - (SELECT groupArray(day_start) as date, - groupArray(count) AS total, - breakdown_value - FROM - (SELECT SUM(total) as count, - day_start, - breakdown_value - FROM - (SELECT * - FROM - (SELECT toUInt16(0) AS total, - ticks.day_start as day_start, - breakdown_value - FROM - (SELECT toStartOfDay(toDateTime('2020-01-04 23:59:59', 'UTC')) - toIntervalDay(number) as day_start - FROM numbers(8) - UNION ALL SELECT toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')) as day_start) as ticks - CROSS JOIN - (SELECT breakdown_value - FROM - (SELECT ['some_val : London', 'some_val : Paris'] as breakdown_value) ARRAY - JOIN breakdown_value) as sec - ORDER BY breakdown_value, - day_start - UNION ALL SELECT sum(toFloat64OrNull(replaceRegexpAll(JSONExtractRaw(properties, 'session duration'), '^"|"$', ''))) as total, - toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) as day_start, - transform(ifNull(nullIf(concat(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_props, '$some_prop'), ''), 'null'), '^"|"$', '')), ''), ' : ', ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'location'), ''), 'null'), '^"|"$', '')), '')), ''), '$$_posthog_breakdown_null_$$'), (['some_val : London', 'some_val : Paris']), (['some_val : London', 'some_val : Paris']), '$$_posthog_breakdown_other_$$') as breakdown_value - FROM events e - INNER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 2 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) as pdi ON events.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id, - argMax(properties, version) as person_props - FROM person - WHERE team_id = 2 - GROUP BY id - HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - WHERE e.team_id = 2 - AND event = 'session start' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') - GROUP BY day_start, - breakdown_value)) - GROUP BY day_start, - breakdown_value - ORDER BY breakdown_value, - day_start) - GROUP BY breakdown_value - ORDER BY breakdown_value) as sub_A - FULL OUTER JOIN - (SELECT groupArray(day_start) as date, - groupArray(count) AS total, - breakdown_value - FROM - (SELECT SUM(total) as count, - day_start, - breakdown_value - FROM - (SELECT * - FROM - (SELECT toUInt16(0) AS total, - ticks.day_start as day_start, - breakdown_value - FROM - (SELECT toStartOfDay(toDateTime('2020-01-04 23:59:59', 'UTC')) - toIntervalDay(number) as day_start - FROM numbers(8) - UNION ALL SELECT toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')) as day_start) as ticks - CROSS JOIN - (SELECT breakdown_value - FROM - (SELECT ['some_val : London', 'some_val : Paris'] as breakdown_value) ARRAY - JOIN breakdown_value) as sec - ORDER BY breakdown_value, - day_start - UNION ALL SELECT avg(toFloat64OrNull(replaceRegexpAll(JSONExtractRaw(properties, 'session duration'), '^"|"$', ''))) as total, - toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) as day_start, - transform(ifNull(nullIf(concat(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_props, '$some_prop'), ''), 'null'), '^"|"$', '')), ''), ' : ', ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'location'), ''), 'null'), '^"|"$', '')), '')), ''), '$$_posthog_breakdown_null_$$'), (['some_val : London', 'some_val : Paris']), (['some_val : London', 'some_val : Paris']), '$$_posthog_breakdown_other_$$') as breakdown_value - FROM events e - INNER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 2 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) as pdi ON events.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id, - argMax(properties, version) as person_props - FROM person - WHERE team_id = 2 - GROUP BY id - HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - WHERE e.team_id = 2 - AND event = 'session start' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') - GROUP BY day_start, - breakdown_value)) - GROUP BY day_start, - breakdown_value - ORDER BY breakdown_value, - day_start) - GROUP BY breakdown_value - ORDER BY breakdown_value) as sub_B ON sub_A.breakdown_value = sub_B.breakdown_value - ''' -# --- -# name: TestFormula.test_breakdown_with_different_breakdown_values_per_series - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(properties, 'location'), '^"|"$', '') AS value, - sum(toFloat64OrNull(replaceRegexpAll(JSONExtractRaw(properties, 'session duration'), '^"|"$', ''))) as count - FROM events e - WHERE team_id = 2 - AND event = 'session start' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2019-12-28 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestFormula.test_breakdown_with_different_breakdown_values_per_series.1 - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(properties, 'location'), '^"|"$', '') AS value, - sum(toFloat64OrNull(replaceRegexpAll(JSONExtractRaw(properties, 'session duration'), '^"|"$', ''))) as count - FROM events e - WHERE team_id = 2 - AND event = 'session end' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2019-12-28 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestFormula.test_breakdown_with_different_breakdown_values_per_series.2 - ''' - SELECT sub_A.date, - arrayMap((A, B) -> A + B, arrayResize(sub_A.total, max_length, 0), arrayResize(sub_B.total, max_length, 0)) , - arrayFilter(x -> notEmpty(x), [replaceRegexpAll(sub_A.breakdown_value, '^"|"$', ''), replaceRegexpAll(sub_B.breakdown_value, '^"|"$', '')])[1] , - arrayMax([length(sub_A.total), length(sub_B.total)]) as max_length - FROM - (SELECT groupArray(day_start) as date, - groupArray(count) AS total, - breakdown_value - FROM - (SELECT SUM(total) as count, - day_start, - breakdown_value - FROM - (SELECT * - FROM - (SELECT toUInt16(0) AS total, - ticks.day_start as day_start, - breakdown_value - FROM - (SELECT toStartOfDay(toDateTime('2020-01-04 23:59:59', 'UTC')) - toIntervalDay(number) as day_start - FROM numbers(8) - UNION ALL SELECT toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')) as day_start) as ticks - CROSS JOIN - (SELECT breakdown_value - FROM - (SELECT ['London', 'Paris'] as breakdown_value) ARRAY - JOIN breakdown_value) as sec - ORDER BY breakdown_value, - day_start - UNION ALL SELECT sum(toFloat64OrNull(replaceRegexpAll(JSONExtractRaw(properties, 'session duration'), '^"|"$', ''))) as total, - toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) as day_start, - transform(ifNull(nullIf(replaceRegexpAll(JSONExtractRaw(properties, 'location'), '^"|"$', ''), ''), '$$_posthog_breakdown_null_$$'), (['London', 'Paris']), (['London', 'Paris']), '$$_posthog_breakdown_other_$$') as breakdown_value - FROM events e - WHERE e.team_id = 2 - AND event = 'session start' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') - GROUP BY day_start, - breakdown_value)) - GROUP BY day_start, - breakdown_value - ORDER BY breakdown_value, - day_start) - GROUP BY breakdown_value - ORDER BY breakdown_value) as sub_A - FULL OUTER JOIN - (SELECT groupArray(day_start) as date, - groupArray(count) AS total, - breakdown_value - FROM - (SELECT SUM(total) as count, - day_start, - breakdown_value - FROM - (SELECT * - FROM - (SELECT toUInt16(0) AS total, - ticks.day_start as day_start, - breakdown_value - FROM - (SELECT toStartOfDay(toDateTime('2020-01-04 23:59:59', 'UTC')) - toIntervalDay(number) as day_start - FROM numbers(8) - UNION ALL SELECT toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')) as day_start) as ticks - CROSS JOIN - (SELECT breakdown_value - FROM - (SELECT ['London', 'Belo Horizonte', '$$_posthog_breakdown_null_$$'] as breakdown_value) ARRAY - JOIN breakdown_value) as sec - ORDER BY breakdown_value, - day_start - UNION ALL SELECT sum(toFloat64OrNull(replaceRegexpAll(JSONExtractRaw(properties, 'session duration'), '^"|"$', ''))) as total, - toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) as day_start, - transform(ifNull(nullIf(replaceRegexpAll(JSONExtractRaw(properties, 'location'), '^"|"$', ''), ''), '$$_posthog_breakdown_null_$$'), (['London', 'Belo Horizonte', '$$_posthog_breakdown_null_$$']), (['London', 'Belo Horizonte', '$$_posthog_breakdown_null_$$']), '$$_posthog_breakdown_other_$$') as breakdown_value - FROM events e - WHERE e.team_id = 2 - AND event = 'session end' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') - GROUP BY day_start, - breakdown_value)) - GROUP BY day_start, - breakdown_value - ORDER BY breakdown_value, - day_start) - GROUP BY breakdown_value - ORDER BY breakdown_value) as sub_B ON sub_A.breakdown_value = sub_B.breakdown_value - ''' -# --- -# name: TestFormula.test_formula_with_unique_sessions - ''' - SELECT sub_A.date, - arrayMap((A, B) -> A / B, arrayResize(sub_A.total, max_length, 0), arrayResize(sub_B.total, max_length, 0)) , - arrayMax([length(sub_A.total), length(sub_B.total)]) as max_length - FROM - (SELECT groupArray(day_start) as date, - groupArray(count) AS total - FROM - (SELECT SUM(total) AS count, - day_start - FROM - (SELECT toUInt16(0) AS total, - toStartOfDay(toDateTime('2020-01-04 23:59:59', 'UTC')) - toIntervalDay(number) AS day_start - FROM numbers(dateDiff('day', toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')), toDateTime('2020-01-04 23:59:59', 'UTC'))) - UNION ALL SELECT toUInt16(0) AS total, - toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')) - UNION ALL SELECT count(DISTINCT e."$session_id") AS total, - toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) AS date - FROM events e - WHERE team_id = 2 - AND event = 'session start' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') - GROUP BY date) - GROUP BY day_start - ORDER BY day_start)) as sub_A - CROSS JOIN - (SELECT groupArray(day_start) as date, - groupArray(count) AS total - FROM - (SELECT SUM(total) AS count, - day_start - FROM - (SELECT toUInt16(0) AS total, - toStartOfDay(toDateTime('2020-01-04 23:59:59', 'UTC')) - toIntervalDay(number) AS day_start - FROM numbers(dateDiff('day', toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')), toDateTime('2020-01-04 23:59:59', 'UTC'))) - UNION ALL SELECT toUInt16(0) AS total, - toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')) - UNION ALL SELECT count(DISTINCT pdi.person_id) AS total, - toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) AS date - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 2 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 2 - AND event = 'session start' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC')) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 2 - AND event = 'session start' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') - GROUP BY date) - GROUP BY day_start - ORDER BY day_start)) as sub_B - ''' -# --- -# name: TestFormula.test_hour_interval_day_level_relative - ''' - SELECT sub_A.date, - arrayMap((A, B) -> A + B, arrayResize(sub_A.total, max_length, 0), arrayResize(sub_B.total, max_length, 0)) , - arrayMax([length(sub_A.total), length(sub_B.total)]) as max_length - FROM - (SELECT groupArray(day_start) as date, - groupArray(count) AS total - FROM - (SELECT SUM(total) AS count, - day_start - FROM - (SELECT toUInt16(0) AS total, - toStartOfHour(toDateTime('2020-01-03 13:59:59', 'UTC')) - toIntervalHour(number) AS day_start - FROM numbers(dateDiff('hour', toStartOfHour(toDateTime('2020-01-02 00:00:00', 'UTC')), toDateTime('2020-01-03 13:59:59', 'UTC'))) - UNION ALL SELECT toUInt16(0) AS total, - toStartOfHour(toDateTime('2020-01-02 00:00:00', 'UTC')) - UNION ALL SELECT sum(toFloat64OrNull(replaceRegexpAll(JSONExtractRaw(properties, 'session duration'), '^"|"$', ''))) AS total, - toStartOfHour(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) AS date - FROM events e - WHERE team_id = 2 - AND event = 'session start' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfHour(toDateTime('2020-01-02 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-03 13:59:59', 'UTC') - GROUP BY date) - GROUP BY day_start - ORDER BY day_start)) as sub_A - CROSS JOIN - (SELECT groupArray(day_start) as date, - groupArray(count) AS total - FROM - (SELECT SUM(total) AS count, - day_start - FROM - (SELECT toUInt16(0) AS total, - toStartOfHour(toDateTime('2020-01-03 13:59:59', 'UTC')) - toIntervalHour(number) AS day_start - FROM numbers(dateDiff('hour', toStartOfHour(toDateTime('2020-01-02 00:00:00', 'UTC')), toDateTime('2020-01-03 13:59:59', 'UTC'))) - UNION ALL SELECT toUInt16(0) AS total, - toStartOfHour(toDateTime('2020-01-02 00:00:00', 'UTC')) - UNION ALL SELECT avg(toFloat64OrNull(replaceRegexpAll(JSONExtractRaw(properties, 'session duration'), '^"|"$', ''))) AS total, - toStartOfHour(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) AS date - FROM events e - WHERE team_id = 2 - AND event = 'session start' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfHour(toDateTime('2020-01-02 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-03 13:59:59', 'UTC') - GROUP BY date) - GROUP BY day_start - ORDER BY day_start)) as sub_B - ''' -# --- -# name: TestFormula.test_hour_interval_hour_level_relative - ''' - SELECT sub_A.date, - arrayMap((A, B) -> A + B, arrayResize(sub_A.total, max_length, 0), arrayResize(sub_B.total, max_length, 0)) , - arrayMax([length(sub_A.total), length(sub_B.total)]) as max_length - FROM - (SELECT groupArray(day_start) as date, - groupArray(count) AS total - FROM - (SELECT SUM(total) AS count, - day_start - FROM - (SELECT toUInt16(0) AS total, - toStartOfHour(toDateTime('2020-01-03 13:59:59', 'UTC')) - toIntervalHour(number) AS day_start - FROM numbers(dateDiff('hour', toStartOfHour(toDateTime('2020-01-02 13:05:01', 'UTC')), toDateTime('2020-01-03 13:59:59', 'UTC'))) - UNION ALL SELECT toUInt16(0) AS total, - toStartOfHour(toDateTime('2020-01-02 13:05:01', 'UTC')) - UNION ALL SELECT sum(toFloat64OrNull(replaceRegexpAll(JSONExtractRaw(properties, 'session duration'), '^"|"$', ''))) AS total, - toStartOfHour(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) AS date - FROM events e - WHERE team_id = 2 - AND event = 'session start' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfHour(toDateTime('2020-01-02 13:05:01', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-03 13:59:59', 'UTC') - GROUP BY date) - GROUP BY day_start - ORDER BY day_start)) as sub_A - CROSS JOIN - (SELECT groupArray(day_start) as date, - groupArray(count) AS total - FROM - (SELECT SUM(total) AS count, - day_start - FROM - (SELECT toUInt16(0) AS total, - toStartOfHour(toDateTime('2020-01-03 13:59:59', 'UTC')) - toIntervalHour(number) AS day_start - FROM numbers(dateDiff('hour', toStartOfHour(toDateTime('2020-01-02 13:05:01', 'UTC')), toDateTime('2020-01-03 13:59:59', 'UTC'))) - UNION ALL SELECT toUInt16(0) AS total, - toStartOfHour(toDateTime('2020-01-02 13:05:01', 'UTC')) - UNION ALL SELECT avg(toFloat64OrNull(replaceRegexpAll(JSONExtractRaw(properties, 'session duration'), '^"|"$', ''))) AS total, - toStartOfHour(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) AS date - FROM events e - WHERE team_id = 2 - AND event = 'session start' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfHour(toDateTime('2020-01-02 13:05:01', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-03 13:59:59', 'UTC') - GROUP BY date) - GROUP BY day_start - ORDER BY day_start)) as sub_B - ''' -# --- -# name: TestFormula.test_regression_formula_with_session_duration_aggregation - ''' - SELECT sub_A.date, - arrayMap((A, B) -> A / B, arrayResize(sub_A.total, max_length, 0), arrayResize(sub_B.total, max_length, 0)) , - arrayMax([length(sub_A.total), length(sub_B.total)]) as max_length - FROM - (SELECT groupArray(day_start) as date, - groupArray(count) AS total - FROM - (SELECT SUM(total) AS count, - day_start - FROM - (SELECT toUInt16(0) AS total, - toStartOfDay(toDateTime('2020-01-04 23:59:59', 'UTC')) - toIntervalDay(number) AS day_start - FROM numbers(dateDiff('day', toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')), toDateTime('2020-01-04 23:59:59', 'UTC'))) - UNION ALL SELECT toUInt16(0) AS total, - toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')) - UNION ALL SELECT avg(session_duration) AS total, date - FROM - (SELECT toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) as date, - any(sessions.session_duration) as session_duration - FROM events e - INNER JOIN - (SELECT "$session_id" AS session_id_0, - dateDiff('second', min(timestamp), max(timestamp)) as session_duration - FROM events - WHERE session_id_0 != '' - AND team_id = 2 - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2019-12-28 00:00:00', 'UTC') - INTERVAL 24 HOUR - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') + INTERVAL 24 HOUR - GROUP BY session_id_0) as sessions ON sessions."session_id_0" = e."$session_id" - WHERE team_id = 2 - AND event = 'session start' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') - GROUP BY e."$session_id", date) - GROUP BY date) - GROUP BY day_start - ORDER BY day_start)) as sub_A - CROSS JOIN - (SELECT groupArray(day_start) as date, - groupArray(count) AS total - FROM - (SELECT SUM(total) AS count, - day_start - FROM - (SELECT toUInt16(0) AS total, - toStartOfDay(toDateTime('2020-01-04 23:59:59', 'UTC')) - toIntervalDay(number) AS day_start - FROM numbers(dateDiff('day', toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')), toDateTime('2020-01-04 23:59:59', 'UTC'))) - UNION ALL SELECT toUInt16(0) AS total, - toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')) - UNION ALL SELECT count(*) AS total, - toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) AS date - FROM events e - INNER JOIN - (SELECT "$session_id" AS session_id_1, - dateDiff('second', min(timestamp), max(timestamp)) as session_duration - FROM events - WHERE session_id_1 != '' - AND team_id = 2 - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2019-12-28 00:00:00', 'UTC') - INTERVAL 24 HOUR - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') + INTERVAL 24 HOUR - GROUP BY session_id_1) as sessions ON sessions."session_id_1" = e."$session_id" - WHERE team_id = 2 - AND event = 'session end' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') - GROUP BY date) - GROUP BY day_start - ORDER BY day_start)) as sub_B - ''' -# --- -# name: TestFormula.test_regression_formula_with_unique_sessions_2x_and_duration_filter - ''' - SELECT sub_A.date, - arrayMap((A, B) -> A / B, arrayResize(sub_A.total, max_length, 0), arrayResize(sub_B.total, max_length, 0)) , - arrayMax([length(sub_A.total), length(sub_B.total)]) as max_length - FROM - (SELECT groupArray(day_start) as date, - groupArray(count) AS total - FROM - (SELECT SUM(total) AS count, - day_start - FROM - (SELECT toUInt16(0) AS total, - toStartOfDay(toDateTime('2020-01-04 23:59:59', 'UTC')) - toIntervalDay(number) AS day_start - FROM numbers(dateDiff('day', toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')), toDateTime('2020-01-04 23:59:59', 'UTC'))) - UNION ALL SELECT toUInt16(0) AS total, - toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')) - UNION ALL SELECT count(DISTINCT e."$session_id") AS total, - toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) AS date - FROM events e - INNER JOIN - (SELECT "$session_id" AS session_id_0, - dateDiff('second', min(timestamp), max(timestamp)) as session_duration - FROM events - WHERE session_id_0 != '' - AND team_id = 2 - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2019-12-28 00:00:00', 'UTC') - INTERVAL 24 HOUR - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') + INTERVAL 24 HOUR - GROUP BY session_id_0) as sessions ON sessions."session_id_0" = e."$session_id" - WHERE team_id = 2 - AND event = 'session start' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') - AND (sessions.session_duration > 12.0) - GROUP BY date) - GROUP BY day_start - ORDER BY day_start)) as sub_A - CROSS JOIN - (SELECT groupArray(day_start) as date, - groupArray(count) AS total - FROM - (SELECT SUM(total) AS count, - day_start - FROM - (SELECT toUInt16(0) AS total, - toStartOfDay(toDateTime('2020-01-04 23:59:59', 'UTC')) - toIntervalDay(number) AS day_start - FROM numbers(dateDiff('day', toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')), toDateTime('2020-01-04 23:59:59', 'UTC'))) - UNION ALL SELECT toUInt16(0) AS total, - toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')) - UNION ALL SELECT count(DISTINCT e."$session_id") AS total, - toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) AS date - FROM events e - INNER JOIN - (SELECT "$session_id" AS session_id_1, - dateDiff('second', min(timestamp), max(timestamp)) as session_duration - FROM events - WHERE session_id_1 != '' - AND team_id = 2 - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2019-12-28 00:00:00', 'UTC') - INTERVAL 24 HOUR - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') + INTERVAL 24 HOUR - GROUP BY session_id_1) as sessions ON sessions."session_id_1" = e."$session_id" - WHERE team_id = 2 - AND event = 'session start' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') - GROUP BY date) - GROUP BY day_start - ORDER BY day_start)) as sub_B - ''' -# --- -# name: TestFormula.test_regression_formula_with_unique_sessions_2x_and_duration_filter_2x - ''' - SELECT sub_A.date, - arrayMap((A, B) -> B, arrayResize(sub_A.total, max_length, 0), arrayResize(sub_B.total, max_length, 0)) , - arrayMax([length(sub_A.total), length(sub_B.total)]) as max_length - FROM - (SELECT groupArray(day_start) as date, - groupArray(count) AS total - FROM - (SELECT SUM(total) AS count, - day_start - FROM - (SELECT toUInt16(0) AS total, - toStartOfDay(toDateTime('2020-01-04 23:59:59', 'UTC')) - toIntervalDay(number) AS day_start - FROM numbers(dateDiff('day', toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')), toDateTime('2020-01-04 23:59:59', 'UTC'))) - UNION ALL SELECT toUInt16(0) AS total, - toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')) - UNION ALL SELECT count(DISTINCT e."$session_id") AS total, - toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) AS date - FROM events e - INNER JOIN - (SELECT "$session_id" AS session_id_0, - dateDiff('second', min(timestamp), max(timestamp)) as session_duration - FROM events - WHERE session_id_0 != '' - AND team_id = 2 - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2019-12-28 00:00:00', 'UTC') - INTERVAL 24 HOUR - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') + INTERVAL 24 HOUR - GROUP BY session_id_0) as sessions ON sessions."session_id_0" = e."$session_id" - WHERE team_id = 2 - AND event = '$autocapture' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') - AND (sessions.session_duration < 30.0) - GROUP BY date) - GROUP BY day_start - ORDER BY day_start)) as sub_A - CROSS JOIN - (SELECT groupArray(day_start) as date, - groupArray(count) AS total - FROM - (SELECT SUM(total) AS count, - day_start - FROM - (SELECT toUInt16(0) AS total, - toStartOfDay(toDateTime('2020-01-04 23:59:59', 'UTC')) - toIntervalDay(number) AS day_start - FROM numbers(dateDiff('day', toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')), toDateTime('2020-01-04 23:59:59', 'UTC'))) - UNION ALL SELECT toUInt16(0) AS total, - toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')) - UNION ALL SELECT count(DISTINCT e."$session_id") AS total, - toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) AS date - FROM events e - INNER JOIN - (SELECT "$session_id" AS session_id_1, - dateDiff('second', min(timestamp), max(timestamp)) as session_duration - FROM events - WHERE session_id_1 != '' - AND team_id = 2 - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2019-12-28 00:00:00', 'UTC') - INTERVAL 24 HOUR - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') + INTERVAL 24 HOUR - GROUP BY session_id_1) as sessions ON sessions."session_id_1" = e."$session_id" - WHERE team_id = 2 - AND event = 'session start' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') - AND (sessions.session_duration > 500.0) - GROUP BY date) - GROUP BY day_start - ORDER BY day_start)) as sub_B - ''' -# ---