From b986d9ca41360006ee99d2ca14207089187e44a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Far=C3=ADas=20Santana?= Date: Tue, 19 Mar 2024 11:34:47 +0100 Subject: [PATCH] feat: Add minmax index for inserted_at (#20765) * feat: Add minmax index for inserted_at * fix: We need to coalesce with _timestamp * fix: Import path --- .../0055_add_minmax_index_on_inserted_at.py | 7 +++++++ posthog/models/event/sql.py | 12 ++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 posthog/clickhouse/migrations/0055_add_minmax_index_on_inserted_at.py diff --git a/posthog/clickhouse/migrations/0055_add_minmax_index_on_inserted_at.py b/posthog/clickhouse/migrations/0055_add_minmax_index_on_inserted_at.py new file mode 100644 index 0000000000000..a1458a2b4a391 --- /dev/null +++ b/posthog/clickhouse/migrations/0055_add_minmax_index_on_inserted_at.py @@ -0,0 +1,7 @@ +from posthog.clickhouse.client.migration_tools import run_sql_with_exceptions +from posthog.models.event.sql import EVENTS_TABLE_INSERTED_AT_INDEX_SQL, EVENTS_TABLE_MATERIALIZE_INSERTED_AT_INDEX_SQL + +operations = [ + run_sql_with_exceptions(EVENTS_TABLE_INSERTED_AT_INDEX_SQL), + run_sql_with_exceptions(EVENTS_TABLE_MATERIALIZE_INSERTED_AT_INDEX_SQL), +] diff --git a/posthog/models/event/sql.py b/posthog/models/event/sql.py index 410904ba006d4..8214ac90fdce0 100644 --- a/posthog/models/event/sql.py +++ b/posthog/models/event/sql.py @@ -106,6 +106,18 @@ storage_policy=STORAGE_POLICY(), ) +EVENTS_TABLE_INSERTED_AT_INDEX_SQL = """ +ALTER TABLE {table_name} ON CLUSTER {cluster} +ADD INDEX `minmax_inserted_at` COALESCE(`inserted_at`, `_timestamp`) +TYPE minmax +GRANULARITY 1 +""".format(table_name=EVENTS_DATA_TABLE(), cluster=settings.CLICKHOUSE_CLUSTER) + +EVENTS_TABLE_MATERIALIZE_INSERTED_AT_INDEX_SQL = """ +ALTER TABLE {table_name} ON CLUSTER {cluster} +MATERIALIZE INDEX `minmax_inserted_at` +""".format(table_name=EVENTS_DATA_TABLE(), cluster=settings.CLICKHOUSE_CLUSTER) + # we add the settings to prevent poison pills from stopping ingestion # kafka_skip_broken_messages is an int, not a boolean, so we explicitly set # the max block size to consume from kafka such that we skip _all_ broken messages