From 854d70e154fceb7c2c0b3c57d89247f95ac35937 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 31 Oct 2024 10:48:48 +0100 Subject: [PATCH] Improve clickhouse table creation --- README.md | 51 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 019aab32..f2e36502 100644 --- a/README.md +++ b/README.md @@ -32,34 +32,34 @@ make ```sql CREATE TABLE graphite ( - Path String, - Value Float64, - Time UInt32, - Date Date, - Timestamp UInt32 + Path String CODEC(ZSTD(3)), -- better compression + Value Float64 CODEC(Gorilla, LZ4), -- better codec for Floats + Time UInt32 CODEC(DoubleDelta, LZ4), -- will be almost always 0 + Date Date CODEC(DoubleDelta, LZ4), -- will be almost always 0 + Timestamp UInt32 CODEC(DoubleDelta, LZ4) -- will be almost always 0 ) ENGINE = GraphiteMergeTree('graphite_rollup') -PARTITION BY toYYYYMM(Date) +PARTITION BY toYearWeek(Date) ORDER BY (Path, Time); -- optional table for faster metric search CREATE TABLE graphite_index ( - Date Date, - Level UInt32, - Path String, - Version UInt32 + Date Date CODEC(DoubleDelta, LZ4), -- will be almost always 0 + Level UInt32 CODEC(DoubleDelta, LZ4), -- will be almost always 0 + Path String CODEC(ZSTD(3)), -- better compression + Version UInt32 TTL toDateTime(Version) + INTERVAL 2 DAY -- is necessary only for the current day ) ENGINE = ReplacingMergeTree(Version) -PARTITION BY toYYYYMM(Date) +PARTITION BY toYearWeek(Date) ORDER BY (Level, Path, Date); -- optional table for storing Graphite tags CREATE TABLE graphite_tagged ( - Date Date, - Tag1 String, - Path String, + Date Date CODEC(DoubleDelta, LZ4), -- will be almost always 0 + Tag1 String CODEC(ZSTD(3)), -- better compression + Path String CODEC(ZSTD(3)), -- better compression Tags Array(String), - Version UInt32 + Version UInt32 TTL toDateTime(Version) + INTERVAL 2 DAY -- is necessary only for the current day ) ENGINE = ReplacingMergeTree(Version) -PARTITION BY toYYYYMM(Date) +PARTITION BY toYearWeek(Date) ORDER BY (Tag1, Path, Date); ``` @@ -67,6 +67,25 @@ ORDER BY (Tag1, Path, Date); You can create Replicated tables. See [ClickHouse documentation](https://clickhouse.tech/docs/en/engines/table-engines/mergetree-family/replication/) +3. One should always use [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer) together with carbon-clickhouse and [graphite-clickhouse](https://github.com/go-graphite/graphite-clickhouse). Without it, the rules from `graphite-rollup` configuration aren't applied automatically. + +### Fine tuning the `PARTITION BY` for graphite data table + +The current `toYearWeek` function used in the `PARTITION BY` is the rule of thumb. When `graphite-ch-optimizer` works, it launches `OPTIMIZE TABLE graphite PARTITION ID 'YYYYWW' FINAL` once per configured interval. When the partition is too big, it processes it a few or even several of times. + +If the partition contains too many data, and optimization runs too long, it could be an option to reduce the partition size, e.g. by using `toYYYYMMDD(toStartOfInterval(Date, toIntervalDay(3)))`. + +Here's the `clickhouse` query to play with `toStartOfInterval` + +```sql +SELECT + toDate(number) AS Date, + toYearWeek(Date) AS YW, + toYYYYMMDD(toStartOfInterval(Date, toIntervalDay(3))) AS `3YMD` +FROM system.numbers +LIMIT 19900, 50 +``` + ## Configuration ``` $ carbon-clickhouse -help