From 49a0f683c7841b11fba91dabd989bc6f6aa73fe6 Mon Sep 17 00:00:00 2001 From: Doug Tidwell Date: Mon, 22 Jul 2024 11:57:46 -0400 Subject: [PATCH 01/17] Added (R) to title bar --- config.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config.toml b/config.toml index a1e9ad9a64..a682b72fb9 100644 --- a/config.toml +++ b/config.toml @@ -54,8 +54,8 @@ anchor = "smart" [languages] [languages.en] -title = "Altinity Knowledge Base" -description = "Altinity Knowledge Base" +title = "Altinity Knowledge Base for ClickHouse®" +description = "Altinity Knowledge Base for ClickHouse®" languageName = "English" # Weight used for sorting. weight = 1 From 106e8964aadd5983664f551fc5bc699797636108 Mon Sep 17 00:00:00 2001 From: Doug Tidwell Date: Mon, 22 Jul 2024 12:02:11 -0400 Subject: [PATCH 02/17] Added (R) to title bar --- content/en/_index.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/content/en/_index.md b/content/en/_index.md index 93785ba4aa..e379add7c6 100755 --- a/content/en/_index.md +++ b/content/en/_index.md @@ -1,6 +1,6 @@ --- -title: "Altinity Knowledge Base" -linkTitle: "Altinity Knowledge Base" +title: "Altinity Knowledge Base for ClickHouse®" +linkTitle: "Altinity Knowledge Base for ClickHouse®" description: "Up-to-date ClickHouse® knowledge base for every ClickHouse user." keywords: - ClickHouse Knowledge Base From 8d70b85fa3a5f599e01afea99604750eee856d04 Mon Sep 17 00:00:00 2001 From: Doug Tidwell Date: Mon, 22 Jul 2024 12:23:07 -0400 Subject: [PATCH 03/17] Added (R) to title bar --- content/en/clickhouse_training/_index.md | 4 ++-- content/en/upgrade/_index.md | 2 +- content/en/upgrade_ebook/_index.md | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/content/en/clickhouse_training/_index.md b/content/en/clickhouse_training/_index.md index 5e722c9a93..507635aeb6 100644 --- a/content/en/clickhouse_training/_index.md +++ b/content/en/clickhouse_training/_index.md @@ -1,6 +1,6 @@ --- -title: "ClickHouse Training" -linkTitle: "ClickHouse Admin Training" +title: "ClickHouse® Training" +linkTitle: "ClickHouse® Admin Training" manualLink: https://hubs.la/Q02mylhn0 weight: 1010 --- diff --git a/content/en/upgrade/_index.md b/content/en/upgrade/_index.md index 5fbe1edcd5..09c8f43689 100644 --- a/content/en/upgrade/_index.md +++ b/content/en/upgrade/_index.md @@ -10,7 +10,7 @@ description: > weight: 10 --- -# ClickHouse Version Upgrade Procedure +# ClickHouse® Version Upgrade Procedure ## Step-by-Step Guide: diff --git a/content/en/upgrade_ebook/_index.md b/content/en/upgrade_ebook/_index.md index 53feb07f21..75041f8c4e 100644 --- a/content/en/upgrade_ebook/_index.md +++ b/content/en/upgrade_ebook/_index.md @@ -1,6 +1,6 @@ --- title: "Upgrade eBook" -linkTitle: "ClickHouse Upgrade eBook" +linkTitle: "ClickHouse® Upgrade eBook" manualLink: https://hubs.la/Q02myvmC0 weight: 1000 --- From f35a93eaf07f9e9e42b741ab88aa37d86acf7bfc Mon Sep 17 00:00:00 2001 From: Slach Date: Tue, 23 Jul 2024 15:36:45 +0400 Subject: [PATCH 04/17] replace https://github.com/AlexAkulov/clickhouse-backup to https://github.com/Altinity/clickhouse-backup --- .../altinity-kb-converting-mergetree-to-replicated.md | 2 +- .../altinity-kb-data-migration/_index.md | 2 +- .../clickhouse-backup-diff.md | 10 +++++----- .../clickhouse-backup.md | 5 ++--- .../clickhouse-deployment-plan.md | 2 +- .../hardening-clickhouse-security.md | 2 +- 6 files changed, 11 insertions(+), 12 deletions(-) diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-converting-mergetree-to-replicated.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-converting-mergetree-to-replicated.md index 28e892ce15..1a88d58a03 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-converting-mergetree-to-replicated.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-converting-mergetree-to-replicated.md @@ -9,7 +9,7 @@ Options here are: 1. Use`INSERT INTO foo_replicated SELECT * FROM foo`. (suitable for small tables) 2. Create table aside and attach all partition from the existing table then drop original table (uses hard links don't require extra disk space). `ALTER TABLE foo_replicated ATTACH PARTITION ID 'bar' FROM 'foo'` You can easily auto generate those commands using a query like: `SELECT DISTINCT 'ALTER TABLE foo_replicated ATTACH PARTITION ID \'' || partition_id || '\' FROM foo;' from system.parts WHERE table = 'foo';` 3. Do it 'in place' using some file manipulation. see the procedure described here: [https://clickhouse.tech/docs/en/engines/table-engines/mergetree-family/replication/\#converting-from-mergetree-to-replicatedmergetree](https://clickhouse.tech/docs/en/engines/table-engines/mergetree-family/replication/#converting-from-mergetree-to-replicatedmergetree) -4. Do a backup of MergeTree and recover as ReplicatedMergeTree. [https://github.com/AlexAkulov/clickhouse-backup/blob/master/Examples.md\#how-to-convert-mergetree-to-replicatedmegretree](https://github.com/AlexAkulov/clickhouse-backup/blob/master/Examples.md#how-to-convert-mergetree-to-replicatedmegretree) +4. Do a backup of MergeTree and recover as ReplicatedMergeTree. [https://github.com/Altinity/clickhouse-backup/blob/master/Examples.md\#how-to-convert-mergetree-to-replicatedmegretree](https://github.com/Altinity/clickhouse-backup/blob/master/Examples.md#how-to-convert-mergetree-to-replicatedmegretree) 5. Embedded command for that should be added in future. ## example for option 2 diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/_index.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/_index.md index 9b211b320f..c9d304bf88 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/_index.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/_index.md @@ -91,7 +91,7 @@ Cons: Just create the backup on server 1, upload it to server 2, and restore the backup. -See [https://github.com/AlexAkulov/clickhouse-backup](https://github.com/AlexAkulov/clickhouse-backup) +See [https://github.com/Altinity/clickhouse-backup](https://github.com/Altinity/clickhouse-backup) [https://altinity.com/blog/introduction-to-clickhouse-backups-and-clickhouse-backup](https://altinity.com/blog/introduction-to-clickhouse-backups-and-clickhouse-backup) diff --git a/content/en/altinity-kb-setup-and-maintenance/clickhouse-backup-diff.md b/content/en/altinity-kb-setup-and-maintenance/clickhouse-backup-diff.md index 7248dfe2fe..2a94414f59 100644 --- a/content/en/altinity-kb-setup-and-maintenance/clickhouse-backup-diff.md +++ b/content/en/altinity-kb-setup-and-maintenance/clickhouse-backup-diff.md @@ -6,20 +6,20 @@ description: > --- ### differential backups using clickhouse-backup -1. Download the latest clickhouse-backup for your platform https://github.com/AlexAkulov/clickhouse-backup/releases +1. Download the latest clickhouse-backup for your platform https://github.com/Altinity/clickhouse-backup/releases ```bash # ubuntu / debian -wget https://github.com/AlexAkulov/clickhouse-backup/releases/download/v1.0.0/clickhouse-backup_1.0.0_amd64.deb -sudo dpkg -i clickhouse-backup_1.0.0_amd64.deb +wget https://github.com/Altinity/clickhouse-backup/releases/download/v2.5.20/clickhouse-backup_2.5.20_amd64.deb +sudo dpkg -i clickhouse-backup_2.5.20_amd64.deb # centos / redhat / fedora -sudo yum install https://github.com/AlexAkulov/clickhouse-backup/releases/download/v1.0.0/clickhouse-backup-1.0.0-1.x86_64.rpm +sudo yum install https://github.com/Altinity/clickhouse-backup/releases/download/v2.5.20/clickhouse-backup-2.5.20-1.x86_64.rpm # other platforms -wget https://github.com/AlexAkulov/clickhouse-backup/releases/download/v1.0.0/clickhouse-backup.tar.gz +wget https://github.com/Altinity/clickhouse-backup/releases/download/v2.5.20/clickhouse-backup.tar.gz sudo mkdir /etc/clickhouse-backup/ sudo mv clickhouse-backup/config.yml /etc/clickhouse-backup/config.yml.example sudo mv clickhouse-backup/clickhouse-backup /usr/bin/ diff --git a/content/en/altinity-kb-setup-and-maintenance/clickhouse-backup.md b/content/en/altinity-kb-setup-and-maintenance/clickhouse-backup.md index 74537344b6..c2e5e2ac46 100644 --- a/content/en/altinity-kb-setup-and-maintenance/clickhouse-backup.md +++ b/content/en/altinity-kb-setup-and-maintenance/clickhouse-backup.md @@ -6,7 +6,7 @@ description: > --- ### Installation and configuration -Download the latest `clickhouse-backup.tar.gz` from assets from [https://github.com/AlexAkulov/clickhouse-backup/releases](https://github.com/AlexAkulov/clickhouse-backup/releases) +Download the latest `clickhouse-backup.tar.gz` from assets from [https://github.com/Altinity/clickhouse-backup/releases](https://github.com/Altinity/clickhouse-backup/releases) This tar.gz contains a single binary of `clickhouse-backup` and an example of config file. @@ -17,7 +17,7 @@ Backblaze has s3 compatible API but requires empty acl parameter `acl: ""`. ```bash $ mkdir clickhouse-backup $ cd clickhouse-backup -$ wget https://github.com/AlexAkulov/clickhouse-backup/releases/download/2.2.0/clickhouse-backup.tar.gz +$ wget https://github.com/Altinity/clickhouse-backup/releases/download/v2.5.20/clickhouse-backup.tar.gz $ tar zxf clickhouse-backup.tar.gz $ rm clickhouse-backup.tar.gz $ cat config.yml @@ -25,7 +25,6 @@ $ cat config.yml ```yaml general: remote_storage: s3 - max_file_size: 1099511627776 disable_progress_bar: false backups_to_keep_local: 0 backups_to_keep_remote: 0 diff --git a/content/en/altinity-kb-setup-and-maintenance/clickhouse-deployment-plan.md b/content/en/altinity-kb-setup-and-maintenance/clickhouse-deployment-plan.md index 6e456a6db8..5cb07e1299 100644 --- a/content/en/altinity-kb-setup-and-maintenance/clickhouse-deployment-plan.md +++ b/content/en/altinity-kb-setup-and-maintenance/clickhouse-deployment-plan.md @@ -56,7 +56,7 @@ description: >- - https://kb.altinity.com/altinity-kb-setup-and-maintenance/schema-migration-tools/ 6. Design backup / failover strategies: - https://clickhouse.com/docs/en/operations/backup/ - - https://github.com/AlexAkulov/clickhouse-backup + - https://github.com/Altinity/clickhouse-backup 7. Develop pipelines / queries, create test suite, CI/CD 8. Do benchmark / stress tests 9. Test configuration changes / server restarts / failovers / version upgrades diff --git a/content/en/altinity-kb-setup-and-maintenance/cluster-production-configuration-guide/hardening-clickhouse-security.md b/content/en/altinity-kb-setup-and-maintenance/cluster-production-configuration-guide/hardening-clickhouse-security.md index edd2e76778..5f58440bdc 100644 --- a/content/en/altinity-kb-setup-and-maintenance/cluster-production-configuration-guide/hardening-clickhouse-security.md +++ b/content/en/altinity-kb-setup-and-maintenance/cluster-production-configuration-guide/hardening-clickhouse-security.md @@ -17,4 +17,4 @@ ClickHouse is currently at the design stage of creating some universal backup so 5. For a second replica, it’s enough to copy metadata and configuration. 6. Data in clickhouse is already compressed with lz4, backup can be compressed bit better, but avoid using cpu-heavy compression algorythms like gzip, use something like zstd instead. -The tool automating that process [clickhouse-backup](https://github.com/AlexAkulov/clickhouse-backup). +The tool automating that process [clickhouse-backup](https://github.com/Altinity/clickhouse-backup). From ebb6ab26a5ad6e8c170220383de3230edf3a64b8 Mon Sep 17 00:00:00 2001 From: Diego Nieto Date: Tue, 23 Jul 2024 13:44:49 +0200 Subject: [PATCH 05/17] Async inserts updated (#93) * Async inserts updated * UPDATED wait_for_async_insert * UPDATED versions --- .../async-inserts.md | 138 ++++++++++-------- 1 file changed, 75 insertions(+), 63 deletions(-) diff --git a/content/en/altinity-kb-queries-and-syntax/async-inserts.md b/content/en/altinity-kb-queries-and-syntax/async-inserts.md index 770935e1f1..589cc4365e 100644 --- a/content/en/altinity-kb-queries-and-syntax/async-inserts.md +++ b/content/en/altinity-kb-queries-and-syntax/async-inserts.md @@ -5,88 +5,100 @@ description: > Async INSERTs --- -Async INSERTs is a ClickHouse feature tha enables batching data automatically and transparently on the server-side. Although async inserts work, they still have issues, but have been improved in latest versions. We recommend to batch at app/ingestor level because you will have more control and you decouple this responsibility from ClickHouse. Being said that here some insights about Async inserts you should now: +Async INSERTs is a ClickHouse feature tha enables batching data automatically and transparently on the server-side. We recommend to batch at app/ingestor level because you will have more control and you decouple this responsibility from ClickHouse, but there are use cases where this is not possible and Async inserts come in handy if you have hundreds or thousands of clients doing small inserts. + +You can check how they work here: [Async inserts](https://clickhouse.com/docs/en/optimize/asynchronous-inserts) + +Some insights about Async inserts you should now: * Async inserts give acknowledgment immediately after the data got inserted into the buffer (wait_for_async_insert = 0) or by default, after the data got written to a part after flushing from buffer (wait_for_async_insert = 1). -* INSERT .. SELECT is NOT async insert. (You can use matView + Null table OR ephemeral columns instead of INPUT function, then ASYNC insert work) +* `INSERT .. SELECT` is NOT async insert. (You can use matView + Null table OR ephemeral columns instead of INPUT function so Async inserts will work) * Async inserts will do (idempotent) retries. -* Async inserts can collect data for some offline remote clusters: Yandex self-driving cars were collecting the metrics data during the ride into ClickHouse installed on the car computer to a distributed table with Async inserts enabled, which were flushed to the cluster once the car was plugged to the network. * Async inserts can do batching, so multiple inserts can be squashed as a single insert (but in that case, retries are not idempotent anymore). -* Async inserts can loose your data in case of sudden restart (no fsyncs by default). -* Async inserted data becomes available for selects not immediately after acknowledgment. -* Async inserts generally have more `moving parts` there are some background threads monitoring new data to be sent and pushing it out. -* Async inserts require extra monitoring from different system.tables (see `system.part_log`, `system.query_log` and `system.asynchronous_inserts` for 22.8). Previously such queries didn't appear in the query log. Check: [#33239](https://github.com/ClickHouse/ClickHouse/pull/33239). * Important to use `wait_for_async_insert = 1` because with any error you will loose data without knowing it. For example your table is read only -> losing data, out of disk space -> losing data, too many parts -> losing data. +* If `wait_for_async_insert = 0`: + * Async inserts can loose your data in case of sudden restart (no fsyncs by default). + * Async inserted data becomes available for selects not immediately after acknowledgment. + * Async insert is fast sending ACK to clients unblocking them, because they have to wait until ACK is received. If your use case can handle data loss, you can use `wait_for_async_insert = 0` it will increase the throughput. +* Async inserts generally have more `moving parts` there are some background threads monitoring new data to be sent and pushing it out. +* Async inserts require extra monitoring from different system.tables (see `system.part_log`, `system.query_log`, `system.asynchronous_inserts` and `system_asynchronous_insert_log`). + +# features / improvements + +* Async insert dedup: Support block deduplication for asynchronous inserts. Before this change, async inserts did not support deduplication, because multiple small inserts coexisted in one inserted batch: + - [#38075](https://github.com/ClickHouse/ClickHouse/issues/38075) + - [#43304](https://github.com/ClickHouse/ClickHouse/pull/43304) +* Added system table `asynchronous_insert_log`. It contains information about asynchronous inserts (including results of queries in fire-and-forget mode. (with wait_for_async_insert=0)) for better introspection [#42040](https://github.com/ClickHouse/ClickHouse/pull/42040) +* Support async inserts in **clickhouse-client** for queries with inlined data **(Native protocol)**: + - [#34267](https://github.com/ClickHouse/ClickHouse/pull/34267) + - [#54098](https://github.com/ClickHouse/ClickHouse/issues/54098) + - [#54381](https://github.com/ClickHouse/ClickHouse/issues/54381) +* Async insert backpressure [#4762](https://github.com/ClickHouse/ClickHouse/issues/47623) +* Limit the deduplication overhead when using `async_insert_deduplicate` [#46549](https://github.com/ClickHouse/ClickHouse/pull/46549) +* `SYSTEM FLUSH ASYNC INSERTS` [#49160](https://github.com/ClickHouse/ClickHouse/pull/49160) +* Adjustable asynchronous insert timeouts [#58486](https://github.com/ClickHouse/ClickHouse/pull/58486) -## 22.10+ bugfixes/features - -* Fixed bug which could lead to deadlock while using asynchronous inserts. See [#43233](https://github.com/ClickHouse/ClickHouse/pull/43233). -* Async insert dedup: Support block deduplication for asynchronous inserts. Before this change, async inserts did not support deduplication, because multiple small inserts coexisted in one inserted batch. See [#38075](https://github.com/ClickHouse/ClickHouse/issues/38075) and [#43304](https://github.com/ClickHouse/ClickHouse/pull/43304). -* Added system table `asynchronous_insert_log`. It contains information about asynchronous inserts (including results of queries in fire-and-forget mode. (with wait_for_async_insert=0)) for better introspection. See [#42040](https://github.com/ClickHouse/ClickHouse/pull/42040). -* Support async inserts in **clickhouse-client** for queries with inlined data **(Native protocol)**. - - Check: - - [#34267](https://github.com/ClickHouse/ClickHouse/pull/34267) - - [#54098](https://github.com/ClickHouse/ClickHouse/issues/54098). - - Also new feature opened by @alexey-milovidov to use async inserts with prepared blocks like a normal insert [#54381](https://github.com/ClickHouse/ClickHouse/issues/54381) -* Async insert backpressure: - - **[#47623 Back pressure for asynchronous inserts](https://github.com/ClickHouse/ClickHouse/issues/47623)** -- In order to limit the deduplication overhead when using `async_insert_deduplicate`, clickhouse writes lots of keys to keeper, and it's easy to exceed the txn limitation. So the setting `async_insert_max_query_number` is added to limit the number of async inserts in a block. This will impact on the throughput of async inserts, so this setting should not considered when duplication is disabled: `async_insert_deduplicate = 0` - - **[#46549 enable async-insert-max-query-number only if async_insert_deduplicate](https://github.com/ClickHouse/ClickHouse/pull/46549)** -- SYSTEM FLUSH ASYNC INSERTS - - **[#49160 Allow to flush asynchronous insert queue](https://github.com/ClickHouse/ClickHouse/pull/49160)** -- Fix crash when async inserts with deduplication are used for ReplicatedMergeTree tables using a nondefault merging algorithm - - **[Fix async insert with deduplication for ReplicatedMergeTree using merging algorithms #51676](https://github.com/ClickHouse/ClickHouse/pull/51676)** -- Async inserts not working with log_comment setting: - - **[Async inserts dont work if people is using log_comment setting with different values](https://github.com/ClickHouse/ClickHouse/issues/48430)** -- Fix misbehaviour with async inserts - - **[Correctly disable async insert with deduplication when its not needed #50663](https://github.com/ClickHouse/ClickHouse/pull/50663)** - -## To improve observability / introspection +## bugfixes + +- Fixed bug which could lead to deadlock while using asynchronous inserts [#43233](https://github.com/ClickHouse/ClickHouse/pull/43233). +- Fix crash when async inserts with deduplication are used for ReplicatedMergeTree tables using a nondefault merging algorithm [#51676](https://github.com/ClickHouse/ClickHouse/pull/51676) +- Async inserts not working with log_comment setting [48430](https://github.com/ClickHouse/ClickHouse/issues/48430) +- Fix misbehaviour with async inserts with deduplication [#50663](https://github.com/ClickHouse/ClickHouse/pull/50663) +- Reject Insert if `async_insert=1` and `deduplicate_blocks_in_dependent_materialized_views=1`[#60888](https://github.com/ClickHouse/ClickHouse/pull/60888) +- Disable `async_insert_use_adaptive_busy_timeout` correctly with compatibility settings [#61486](https://github.com/ClickHouse/ClickHouse/pull/61468) + + +## observability / introspection In 22.x versions, it is not possible to relate `part_log/query_id` column with `asynchronous_insert_log/query_id` column. We need to use `query_log/query_id`: `asynchronous_insert_log` shows up the `query_id` and `flush_query_id` of each async insert. The `query_id` from `asynchronous_insert_log` shows up in the `system.query_log` as `type = 'QueryStart'` but the same `query_id` does not show up in the `query_id` column of the `system.part_log`. Because the `query_id` column in the `part_log` is the identifier of the INSERT query that created a data part, and it seems it is for sync INSERTS but not for async inserts. -So in `asynchronous_inserts` table you can check the current batch that still has not been flushed. In the `asynchronous_insert_log` you can find a log of all the async inserts executed. +So in `asynchronous_inserts` table you can check the current batch that still has not been flushed. In the `asynchronous_insert_log` you can find a log of all the flushed async inserts. + +This has been improved in **ClickHouse 23.7** Flush queries for async inserts (the queries that do the final push of data) are now logged in the `system.query_log` where they appear as `query_kind = 'AsyncInsertFlush'` [#51160](https://github.com/ClickHouse/ClickHouse/pull/51160) + + +## Versions -But in **ClickHouse 23.7** Flush queries for async inserts (the queries that do the final push of data) are now logged in the `system.query_log` where they appear as `query_kind = 'AsyncInsertFlush'`. -- **[Log async insert flush queries into to system.query_log and system.processes #51160](https://github.com/ClickHouse/ClickHouse/pull/51160)** +- **23.8** is a good version to start using async inserts because of the improvements and bugfixes. +- **24.3** the new adaptative timeout mechanism has been added so clickhouse will throttle the inserts based on the server load.[#58486](https://github.com/ClickHouse/ClickHouse/pull/58486) ## Metrics ```sql SELECT name FROM system.columns -WHERE (table = 'metric_log') AND (name ILIKE '%Async%') - -Query id: 3d0b7cbc-7990-4498-9c18-1c988796c487 - -┌─name────────────────────────────────────────────────┐ -│ ProfileEvent_AsyncInsertQuery │ -│ ProfileEvent_AsyncInsertBytes │ -│ ProfileEvent_AsyncInsertCacheHits │ -│ ProfileEvent_FailedAsyncInsertQuery │ -│ ProfileEvent_AsynchronousReadWaitMicroseconds │ -│ ProfileEvent_AsynchronousRemoteReadWaitMicroseconds │ -│ CurrentMetric_DiskObjectStorageAsyncThreads │ -│ CurrentMetric_DiskObjectStorageAsyncThreadsActive │ -│ CurrentMetric_AsynchronousInsertThreads │ -│ CurrentMetric_AsynchronousInsertThreadsActive │part -│ CurrentMetric_AsynchronousReadWait │ -│ CurrentMetric_PendingAsyncInsert │ -│ CurrentMetric_AsyncInsertCacheSize │ -└─────────────────────────────────────────────────────┘ +WHERE (`table` = 'metric_log') AND ((name ILIKE '%asyncinsert%') OR (name ILIKE '%asynchronousinsert%')) + +┌─name─────────────────────────────────────────────┐ +│ ProfileEvent_AsyncInsertQuery │ +│ ProfileEvent_AsyncInsertBytes │ +│ ProfileEvent_AsyncInsertRows │ +│ ProfileEvent_AsyncInsertCacheHits │ +│ ProfileEvent_FailedAsyncInsertQuery │ +│ ProfileEvent_DistributedAsyncInsertionFailures │ +│ CurrentMetric_AsynchronousInsertThreads │ +│ CurrentMetric_AsynchronousInsertThreadsActive │ +│ CurrentMetric_AsynchronousInsertThreadsScheduled │ +│ CurrentMetric_AsynchronousInsertQueueSize │ +│ CurrentMetric_AsynchronousInsertQueueBytes │ +│ CurrentMetric_PendingAsyncInsert │ +│ CurrentMetric_AsyncInsertCacheSize │ +└──────────────────────────────────────────────────┘ SELECT * FROM system.metrics -WHERE metric ILIKE '%async%' - -┌─metric──────────────────────────────┬─value─┬─description──────────────────────────────────────────────────────────────────────┐ -│ AsynchronousInsertThreads │ 0 │ Number of threads in the AsynchronousInsert thread pool. │ -│ AsynchronousInsertThreadsActive │ 0 │ Number of threads in the AsynchronousInsert thread pool running a task. │ -│ AsynchronousReadWait │ 0 │ Number of threads waiting for asynchronous read. │ -│ PendingAsyncInsert │ 0 │ Number of asynchronous inserts that are waiting for flush. │ -│ AsyncInsertCacheSize │ 0 │ Number of async insert hash id in cache │ -└─────────────────────────────────────┴───────┴──────────────────────────────────────────────────────────────────────────────────┘ -``` +WHERE (metric ILIKE '%asyncinsert%') OR (metric ILIKE '%asynchronousinsert%') + +┌─metric─────────────────────────────┬─value─┬─description─────────────────────────────────────────────────────────────┐ +│ AsynchronousInsertThreads │ 1 │ Number of threads in the AsynchronousInsert thread pool. │ +│ AsynchronousInsertThreadsActive │ 0 │ Number of threads in the AsynchronousInsert thread pool running a task. │ +│ AsynchronousInsertThreadsScheduled │ 0 │ Number of queued or active jobs in the AsynchronousInsert thread pool. │ +│ AsynchronousInsertQueueSize │ 1 │ Number of pending tasks in the AsynchronousInsert queue. │ +│ AsynchronousInsertQueueBytes │ 680 │ Number of pending bytes in the AsynchronousInsert queue. │ +│ PendingAsyncInsert │ 7 │ Number of asynchronous inserts that are waiting for flush. │ +│ AsyncInsertCacheSize │ 0 │ Number of async insert hash id in cache │ +└────────────────────────────────────┴───────┴─────────────────────────────────────────────────────────────────────────┘ +``` \ No newline at end of file From 30c82d023b132cfd1ae1531a63f18df7ab201e8a Mon Sep 17 00:00:00 2001 From: Slach Date: Tue, 23 Jul 2024 15:49:22 +0400 Subject: [PATCH 06/17] update dependencies, The following actions uses Node.js version which is deprecated and will be forced to run on node20: actions/checkout@v2, peaceiris/actions-hugo@v2, actions/cache@v2, actions/setup-node@v3, peaceiris/actions-gh-pages@v3 --- .github/workflows/gh-pages.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/gh-pages.yml b/.github/workflows/gh-pages.yml index 33a59b0d3e..a246352abf 100644 --- a/.github/workflows/gh-pages.yml +++ b/.github/workflows/gh-pages.yml @@ -8,23 +8,23 @@ on: jobs: deploy: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - name: Git checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 with: submodules: true # Fetch Hugo themes (true OR recursive) fetch-depth: 0 # Fetch all history for .GitInfo and .Lastmod ref: main - name: Setup Hugo - uses: peaceiris/actions-hugo@v2 + uses: peaceiris/actions-hugo@v3 with: hugo-version: '0.128.2' extended: true - name: Cache Hugo modules - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: /tmp/hugo_cache key: ${{ runner.os }}-hugomod-${{ hashFiles('**/go.sum') }} @@ -32,12 +32,12 @@ jobs: ${{ runner.os }}-hugomod- - name: Setup Node - uses: actions/setup-node@v3 + uses: actions/setup-node@v4 with: - node-version: '14' + node-version: '20' - name: Cache dependencies - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: ~/.npm key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }} @@ -51,7 +51,7 @@ jobs: # run: hugo --gc - name: Deploy - uses: peaceiris/actions-gh-pages@v3 + uses: peaceiris/actions-gh-pages@v4 if: github.ref == 'refs/heads/main' with: github_token: ${{ secrets.GITHUB_TOKEN }} From b04be41af7ffaebee22bb13efab848eae7debb8e Mon Sep 17 00:00:00 2001 From: Diego Nieto Date: Wed, 24 Jul 2024 09:48:29 +0200 Subject: [PATCH 07/17] Update ClickHouse_python_drivers.md Updated asyncio sections --- .../ClickHouse_python_drivers.md | 74 ++++++++++++------- 1 file changed, 48 insertions(+), 26 deletions(-) diff --git a/content/en/altinity-kb-integrations/ClickHouse_python_drivers.md b/content/en/altinity-kb-integrations/ClickHouse_python_drivers.md index 433d35880d..2775ecbfb2 100644 --- a/content/en/altinity-kb-integrations/ClickHouse_python_drivers.md +++ b/content/en/altinity-kb-integrations/ClickHouse_python_drivers.md @@ -23,7 +23,9 @@ The **`clickhouse-driver`** is a Python library used for interacting with ClickH 7. **Asynchronous Support**: Supports asynchronous execution of queries using `asyncio`, allowing for non-blocking query execution in asynchronous Python applications. 8. **Customization**: Provides options for customizing connection settings, query execution behavior, and other parameters to suit specific application requirements and performance considerations. 9. **Compatibility**: Works with various versions of ClickHouse, ensuring compatibility and support for different ClickHouse features and functionalities. -10. **Documentation and Community**: Offers comprehensive documentation and active community support, including examples, tutorials, and forums, to assist developers in effectively using the library and addressing any issues or questions they may have. +10. **Documentation and Community**: Offers comprehensive documentation and active community support, including examples, tutorials, and forums, to assist developers in effectively using the library and addressing any issues or questions they may have. +11. **Supports multiple host** **on connection string** https://clickhouse-driver.readthedocs.io/en/latest/features.html#multiple-hosts +12. **Connection pooling** (aiohttp) **Python ecosystem libs/modules:** @@ -50,13 +52,15 @@ The ClickHouse Connect Python driver is the ClickHouse, Inc supported-official P 8. **Limited Asynchronous Support**: Some implementations of the driver offer asynchronous support, allowing developers to execute queries asynchronously to improve concurrency and scalability in asynchronous Python applications using asynchronous I/O frameworks like `asyncio`. 9. **Configuration Options**: The driver offers various configuration options, such as connection parameters, authentication methods, and connection pooling settings, allowing developers to customize the driver's behavior to suit their specific requirements and environment. 10. **Documentation and Community**: Offers comprehensive documentation and active community support, including examples, tutorials, and forums, to assist developers in effectively using the library and addressing any issues or questions they may have. [https://clickhouse.com/docs/en/integrations/language-clients/python/intro/](https://clickhouse.com/docs/en/integrations/language-clients/python/intro/) +11. **Multiple host on connection string not supported** https://github.com/ClickHouse/clickhouse-connect/issues/74 +12. **Connection pooling** (urllib3) **Python ecosystem libs/modules:** - Good Pandas/Numpy support: [https://clickhouse.com/docs/en/integrations/python#consuming-query-results-with-numpy-pandas-or-arrow](https://clickhouse.com/docs/en/integrations/python#consuming-query-results-with-numpy-pandas-or-arrow) - Decent SQLAlchemy 1.3 and 1.4 support (limited feature set) -It is the most recent driver with the latest feature set (query context and query streaming …. ) +It is the most recent driver with the latest feature set (query context and query streaming …. ), and in recent release [asyncio wrapper](https://github.com/ClickHouse/clickhouse-connect/releases/tag/v0.7.16) You can check multiple official examples here: @@ -68,17 +72,20 @@ Also some Altinity examples from repo: You can clone the repo and use the helper files like `DDL.sql` to setup some tests. -Clickhouse-connect can use a connection pooler (based on urllib3) [https://clickhouse.com/docs/en/integrations/python#customizing-the-http-connection-pool](https://clickhouse.com/docs/en/integrations/python#customizing-the-http-connection-pool) ### Most common use cases: +#### Connection pooler: + +- Clickhouse-connect can use a connection pooler (based on urllib3) https://clickhouse.com/docs/en/integrations/python#customizing-the-http-connection-pool +- Clickhouse-driver you can use **aiohttp** (https://docs.aiohttp.org/en/stable/client_advanced.html#limiting-connection-pool-size) + #### Managing ClickHouse `session_id`: - clickhouse-driver - Because it is using the Native Interface `session_id` is managed internally by clickhouse, so it is very rare (unless using asyncio) to get: `Code: 373. DB::Exception: Session is locked by a concurrent client. (SESSION_IS_LOCKED)` . - - clickhouse-connect: How to use clickhouse-connect in a pythonic way and avoid getting `SESSION_IS_LOCKED` exceptions: - [https://clickhouse.com/docs/en/integrations/python#managing-clickhouse-session-ids](https://clickhouse.com/docs/en/integrations/python#managing-clickhouse-session-ids) @@ -96,17 +103,39 @@ Also in clickhouse documentation some explanation how to set `session_id` with a [Best practices with flask · Issue #73 · ClickHouse/clickhouse-connect](https://github.com/ClickHouse/clickhouse-connect/issues/73#issuecomment-1325280242) -#### clickhouse-connect & clickhouse-driver with Asyncio +#### Asyncio (asynchronous wrappers) + +##### clickhouse-connect + +New release with [asyncio wrapper for clickhouse-connect](https://github.com/ClickHouse/clickhouse-connect/releases/tag/v0.7.16) + +How the wrapper works: https://clickhouse.com/docs/en/integrations/python#asyncclient-wrapper + +Wrapper and connection pooler example: + +```python +import clickhouse_connect +import asyncio +from clickhouse_connect.driver.httputil import get_pool_manager + +async def main(): + client = await clickhouse_connect.get_async_client(host='localhost', port=8123, pool_mgr=get_pool_manager()) + for i in range(100): + result = await client.query("SELECT name FROM system.databases") + print(result.result_rows) + +asyncio.run(main()) +``` -`clickhouse-connect` code is synchronous and running synchronous functions in an async application is a workaround and might not be as efficient as using a library designed for asynchronous operations from the ground up. Problem is there are few libs/modules in Python. So you can use `concurrent.futures` and `ThreadpoolExecutor` or `ProcessPoolExecutor`. Python GIL has a mutex over Threads but not to Processes so if you need performance at the cost of using processes instead of threads (not much different for medium workloads) you can use `ProcesspoolExecutor` instead. +`clickhouse-connect` code is synchronous by default and running synchronous functions in an async application is a workaround and might not be as efficient as using a library/wrapper designed for asynchronous operations from the ground up.. So you can use the current wrapper or you can use another approach with `asyncio` and `concurrent.futures` and `ThreadpoolExecutor` or `ProcessPoolExecutor`. Python GIL has a mutex over Threads but not to Processes so if you need performance at the cost of using processes instead of threads (not much different for medium workloads) you can use `ProcesspoolExecutor` instead. -Some info about this from the tinybird guys [https://www.tinybird.co/blog-posts/killing-the-processpoolexecutor](https://www.tinybird.co/blog-posts/killing-the-processpoolexecutor) +Some info about this from the tinybird guys https://www.tinybird.co/blog-posts/killing-the-processpoolexecutor -For clickhouse-connect +For clickhouse-connect : ```python import asyncio -from concurrent.futures import ThreadPoolExecutor +from concurrent.futures import ProcessPoolExecutor import clickhouse_connect # Function to execute a query using clickhouse-connect synchronously @@ -115,11 +144,11 @@ def execute_query_sync(query): result = client.query(query) return result -# Asynchronous wrapper function to run the synchronous function in a thread pool +# Asynchronous wrapper function to run the synchronous function in a process pool async def execute_query_async(query): loop = asyncio.get_running_loop() - # Use ThreadPoolExecutor to execute the synchronous function - with ThreadPoolExecutor() as pool: + # Use ProcessPoolExecutor to execute the synchronous function + with ProcessPoolExecutor() as pool: result = await loop.run_in_executor(pool, execute_query_sync, query) return result @@ -132,21 +161,14 @@ async def main(): if __name__ == '__main__': asyncio.run(main()) ``` +##### Clickhouse-driver -Clickhouse-driver code is also synchronous and suffers the same problem as clickhouse-connect - -[https://clickhouse-driver.readthedocs.io/en/latest/quickstart.html#async-and-multithreading](https://clickhouse-driver.readthedocs.io/en/latest/quickstart.html#async-and-multithreading) - -So to use an asynchronous approach it is recommended to use a connection pool and some `asyncio` wrapper that can hide the complexity of using the `ThreadPoolExecutor/ProcessPoolExecutor` - -To begin testing such environment [aiohttp](https://docs.aiohttp.org/) is a good approach. Here an example: - -[https://github.com/lesandie/clickhouse-tests/blob/main/scripts/test_aiohttp_inserts.py](https://github.com/lesandie/clickhouse-tests/blob/main/scripts/test_aiohttp_inserts.py) - -How to tune the connection pooler: [https://docs.aiohttp.org/en/stable/client_advanced.html#limiting-connection-pool-size](https://docs.aiohttp.org/en/stable/client_advanced.html#limiting-connection-pool-size)) +`clickhouse-driver` code is also synchronous and suffers the same problem as `clickhouse-connect` https://clickhouse-driver.readthedocs.io/en/latest/quickstart.html#async-and-multithreading -Also `aiochclient` is another good wrapper [https://github.com/maximdanilchenko/aiochclient](https://github.com/maximdanilchenko/aiochclient) for the HTTP interface +So to use asynchronous approach it is recommended to use a connection pool and some asyncio wrapper that can hide the complexity of using the `ThreadPoolExecutor/ProcessPoolExecutor` -For the native interface you can try [https://github.com/long2ice/asynch](https://github.com/long2ice/asynch) +- To begin testing such environment [aiohttp](https://docs.aiohttp.org/) is a good approach. Here an example: https://github.com/lesandie/clickhouse-tests/blob/main/scripts/test_aiohttp_inserts.py + This will use simply requests module and aiohttp (you can tune the connection pooler https://docs.aiohttp.org/en/stable/client_advanced.html#limiting-connection-pool-size) -`asynch` is an asyncio ClickHouse Python Driver with native (TCP) interface support, which reuses most of [clickhouse-driver](https://github.com/mymarilyn/clickhouse-driver) and complies with [PEP249](https://www.python.org/dev/peps/pep-0249/). \ No newline at end of file +- Also `aiochclient` is another good wrapper https://github.com/maximdanilchenko/aiochclient for the HTTP interface +- For the native interface you can try https://github.com/long2ice/asynch, `asynch` is an asyncio ClickHouse Python Driver with native (TCP) interface support, which reuse most of [clickhouse-driver](https://github.com/mymarilyn/clickhouse-driver) and comply with [PEP249](https://www.python.org/dev/peps/pep-0249/). From 655ccbb87f9cc3e4bc16707ebeeab1aca9ee2f96 Mon Sep 17 00:00:00 2001 From: xc0derx <11428624+xc0derx@users.noreply.github.com> Date: Thu, 25 Jul 2024 09:50:06 +0000 Subject: [PATCH 08/17] change docker hub link --- .../altinity-kb-clickhouse-in-docker.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-clickhouse-in-docker.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-clickhouse-in-docker.md index cf6bc5bbaf..92596e2ce0 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-clickhouse-in-docker.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-clickhouse-in-docker.md @@ -8,7 +8,7 @@ description: > Check -* [https://hub.docker.com/r/yandex/clickhouse-server/](https://hub.docker.com/r/yandex/clickhouse-server/) +* [https://hub.docker.com/r/clickhouse/clickhouse-server](https://hub.docker.com/r/clickhouse/clickhouse-server) * [https://docs.altinity.com/clickhouseonkubernetes/](https://docs.altinity.com/clickhouseonkubernetes/) * sources of entry point - [https://github.com/ClickHouse/ClickHouse/blob/master/docker/server/entrypoint.sh](https://github.com/ClickHouse/ClickHouse/blob/master/docker/server/entrypoint.sh) @@ -40,7 +40,7 @@ docker run -d \ --cap-add=IPC_LOCK \ --cap-add=SYS_PTRACE \ --network=host \ - yandex/clickhouse-server:21.1.7 + clickhouse/clickhouse-server:latest docker exec -it some-clickhouse-server clickhouse-client docker exec -it some-clickhouse-server bash From 99554455418918a40a30e01869871c0cf68dea32 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 25 Jul 2024 21:18:10 +0000 Subject: [PATCH 09/17] @xc0derx has signed the CLA in Altinity/altinityknowledgebase#95 --- signatures/version1/cla.json | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/signatures/version1/cla.json b/signatures/version1/cla.json index 25abef6889..7a9a86ae42 100644 --- a/signatures/version1/cla.json +++ b/signatures/version1/cla.json @@ -231,6 +231,14 @@ "created_at": "2024-07-22T02:02:49Z", "repoId": 358618261, "pullRequestNo": 89 + }, + { + "name": "xc0derx", + "id": 11428624, + "comment_id": 2251412685, + "created_at": "2024-07-25T21:17:57Z", + "repoId": 358618261, + "pullRequestNo": 95 } ] } \ No newline at end of file From 893f1b803882a1091409f7e8653b9117ba4bcaf4 Mon Sep 17 00:00:00 2001 From: Robert Hodges Date: Thu, 25 Jul 2024 20:56:48 -0700 Subject: [PATCH 10/17] Update name of ClickHouse project owner --- content/en/_index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/en/_index.md b/content/en/_index.md index e379add7c6..1891f8ce76 100755 --- a/content/en/_index.md +++ b/content/en/_index.md @@ -26,6 +26,6 @@ For more detailed information about Altinity services support, see the following The following sites are also useful references regarding ClickHouse: -* [ClickHouse.com documentation](https://clickhouse.com/docs/en/): From Yandex, the creators of ClickHouse +* [ClickHouse.com documentation](https://clickhouse.com/docs/en/): Official documentation from ClickHouse Inc. * [ClickHouse at Stackoverflow](https://stackoverflow.com/questions/tagged/clickhouse): Community driven responses to questions regarding ClickHouse * [Google groups (Usenet) yes we remember it](https://groups.google.com/g/clickhouse): The grandparent of all modern discussion boards. From 2e9d6297447ee315c1010395b48e1981d59b0a2f Mon Sep 17 00:00:00 2001 From: Robert Hodges Date: Fri, 26 Jul 2024 07:03:16 -0700 Subject: [PATCH 11/17] Update yandex tags to clickhouse --- .../altinity-cloud/altinity-cloud-access-management.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/content/en/altinity-kb-integrations/altinity-cloud/altinity-cloud-access-management.md b/content/en/altinity-kb-integrations/altinity-cloud/altinity-cloud-access-management.md index 129586e1bd..74f327386c 100644 --- a/content/en/altinity-kb-integrations/altinity-cloud/altinity-cloud-access-management.md +++ b/content/en/altinity-kb-integrations/altinity-cloud/altinity-cloud-access-management.md @@ -28,7 +28,7 @@ To add the `access_management` setting to an Altinity.Cloud ClickHouse Cluster: 1. **Contents**: Enter the following to allow the `clickhouse_operator` that controls the cluster through the `clickhouse-operator` the ability to set administrative options: ```xml - + 1 @@ -37,7 +37,7 @@ To add the `access_management` setting to an Altinity.Cloud ClickHouse Cluster: 1 - + ``` access_management=1 means that users `admin`, `clickhouse_operator` are able to create users and grant them privileges using SQL. @@ -50,7 +50,7 @@ To add the `access_management` setting to an Altinity.Cloud ClickHouse Cluster: 3. **Contents**: ```xml - + /etc/clickhouse-server/users.xml @@ -62,5 +62,5 @@ To add the `access_management` setting to an Altinity.Cloud ClickHouse Cluster: /var/lib/clickhouse/access/ - + ``` From d1f69f234b827809e5a0230bb4aa8d07162fae54 Mon Sep 17 00:00:00 2001 From: Robert Hodges Date: Mon, 29 Jul 2024 07:53:16 -0700 Subject: [PATCH 12/17] Update operator product name --- .../clickhouse-operator.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/content/en/altinity-kb-setup-and-maintenance/clickhouse-operator.md b/content/en/altinity-kb-setup-and-maintenance/clickhouse-operator.md index e84894a7bf..71a3dec9c5 100644 --- a/content/en/altinity-kb-setup-and-maintenance/clickhouse-operator.md +++ b/content/en/altinity-kb-setup-and-maintenance/clickhouse-operator.md @@ -1,11 +1,11 @@ --- -title: "ClickHouse operator" +title: "Altinity Kubernetes Operator For ClickHouse" linkTitle: "ClickHouse operator" weight: 100 description: >- - ClickHouse operator + Altinity Kubernetes Operator For ClickHouse --- -## ClickHouse operator +## Altinity Operator Documentation https://github.com/Altinity/clickhouse-operator/blob/master/docs/README.md From 3e41a19644b66d46db743db20321bd5b94b545df Mon Sep 17 00:00:00 2001 From: Doug Tidwell Date: Mon, 29 Jul 2024 19:08:14 -0400 Subject: [PATCH 13/17] Site cleanup, mostly minor changes --- config.toml | 2 +- content/en/_index.md | 8 ++++++- .../dictionaries-and-arrays.md | 2 +- .../partial-updates.md | 4 ++-- .../security-named-collections.md | 2 +- content/en/altinity-kb-integrations/Spark.md | 6 ++--- .../altinity-kb-exactly-once-semantics.md | 2 +- .../catboost-mindsdb-fast.ai.md | 2 +- .../mysql-clickhouse.md | 4 ++-- ...e-issues-with-running-clickhouse-in-k8s.md | 2 +- .../altinity-kb-queries-and-syntax/_index.md | 2 +- .../altinity-kb-final-clause-speed.md | 2 +- .../altinity-kb-kill-query.md | 2 +- .../altinity-kb-optimize-vs-optimize-final.md | 2 +- .../altinity-kb-parameterized-views.md | 4 ++-- ...e-deadlock-avoided.-client-should-retry.md | 2 +- .../altinity-kb-sample-by.md | 4 ++-- .../ansi-sql-mode.md | 2 +- .../array-functions-as-window.md | 4 +--- .../async-inserts.md | 4 ++-- .../cumulative-unique.md | 2 +- .../data-types-on-disk-and-in-ram.md | 2 +- .../distinct-vs-group-by-vs-limit-by.md | 2 +- .../explain-query.md | 2 +- .../group-by/_index.md | 8 +++---- .../group-by/tricks.md | 2 +- .../lag-lead.md | 2 +- .../mutations.md | 2 +- .../pivot-unpivot.md | 2 +- .../row_policy_using_dictionary.md | 2 +- .../sampling-example.md | 4 ++-- ...-ifstate-for-simple-aggregate-functions.md | 2 +- .../skip-indexes/_index.md | 2 +- ...kip-index-bloom_filter-for-array-column.md | 5 ++-- .../slow_select_count.md | 4 ++-- .../state-and-merge-combinators.md | 2 +- .../time-zones.md | 8 +++---- .../top-n-and-remain.md | 2 +- .../trace_log.md | 2 +- .../troubleshooting.md | 5 +++- .../ttl/modify-ttl.md | 2 +- .../ttl/ttl-group-by-examples.md | 4 ++-- .../ttl/ttl-recompress-example.md | 2 +- .../update-via-dictionary.md | 2 +- .../variable-partitioning.md | 2 +- .../window-functions.md | 4 ++-- .../materialized-views/_index.md | 2 +- .../idempotent_inserts_mv.md | 4 ++-- .../two-columns-indexing.md | 2 +- .../altinity-kb-aggressive_merges.md | 2 +- ...y-kb-converting-mergetree-to-replicated.md | 2 +- ...y-kb-clickhouse-copier-20.3-and-earlier.md | 12 +++++----- ...altinity-kb-clickhouse-copier-20.4_21.6.md | 24 +++++++++---------- ...ity-kb-clickhouse-copier-kubernetes-job.md | 4 ++-- .../remote-table-function.md | 2 +- ...ed-hosts-0-of-them-are-currently-active.md | 2 +- .../s3_and_mutations.md | 2 +- .../s3_cache_example.md | 2 +- .../altinity-kb-server-config-files.md | 16 ++++++------- .../altinity-kb-system-tables-eat-my-disk.md | 4 ++-- .../clickhouse-keeper.md | 6 ++--- .../ch-logs-2-json-vectordev.md | 8 +++---- .../disk_encryption.md | 2 +- .../filesystems.md | 2 +- .../schema-migration-tools/_index.md | 12 +++++----- .../schema-migration-tools/golang-migrate.md | 2 +- .../uniqExact-to-uniq-combined.md | 2 +- .../who-ate-my-cpu.md | 2 +- ...b-number-of-active-parts-in-a-partition.md | 2 +- .../altinity-kb-useful-queries/query_log.md | 2 +- content/en/engines/_index.md | 8 +++---- .../_index.md | 8 +++---- .../how-to-convert-ordinary-to-atomic.md | 4 ++-- .../altinity-kb-nulls-in-order-by.md | 2 +- .../collapsing-vs-replacing.md | 2 +- .../merge-performance-final-optimize-by.md | 2 +- .../part-naming-and-mvcc.md | 4 ++-- .../pick-keys.md | 6 ++--- .../replacingmergetree/_index.md | 4 ++-- ...gmergetree-does-not-collapse-duplicates.md | 4 ++-- .../skip-index.md | 2 +- .../versioned-collapsing-mergetree.md | 18 +++++++------- .../en/upgrade/clickhouse-feature-report.md | 8 +++---- layouts/partials/toc.html | 2 +- 84 files changed, 170 insertions(+), 164 deletions(-) diff --git a/config.toml b/config.toml index a682b72fb9..1317d2db9e 100644 --- a/config.toml +++ b/config.toml @@ -76,7 +76,7 @@ time_format_blog = "2006.01.02" [params] # copyright = " Altinity Inc." -copyright = " Altinity Inc. Altinity®, Altinity.Cloud®, and Altinity Stable® are registered trademarks of Altinity, Inc. ClickHouse® is a registered trademark of ClickHouse, Inc.; Altinity is not affiliated with or associated with ClickHouse, Inc." +copyright = " Altinity Inc. Altinity®, Altinity.Cloud®, and Altinity Stable® are registered trademarks of Altinity, Inc. ClickHouse® is a registered trademark of ClickHouse, Inc.; Altinity is not affiliated with or associated with ClickHouse, Inc. Kafka, Kubernetes, MySQL, and PostgreSQL are trademarks and property of their respective owners." privacy_policy = "https://altinity.com/privacy-policy/" favicon = "/favicon.ico" diff --git a/content/en/_index.md b/content/en/_index.md index e379add7c6..edbc954e91 100755 --- a/content/en/_index.md +++ b/content/en/_index.md @@ -21,7 +21,13 @@ The [Altinity Knowledge Base is licensed under Apache 2.0](https://github.com/Al For more detailed information about Altinity services support, see the following: * [Altinity](https://altinity.com/): Providers of Altinity.Cloud, providing SOC-2 certified support for ClickHouse. -* [Altinity ClickHouse Documentation](https://docs.altinity.com): Detailed guides on installing and connecting ClickHouse software to other services. +* [Altinity.com Documentation](https://docs.altinity.com): Detailed guides on working with: + * [Altinity.Cloud](https://docs.altinity.com/altinitycloud/) + * [Altinity.Cloud Anywhere](https://docs.altinity.com/altinitycloudanywhere/) + * [The Altinity Cloud Manager](https://docs.altinity.com/altinitycloud/quickstartguide/clusterviewexplore/) + * [The Altinity Kubernetes Operator for ClickHouse](https://docs.altinity.com/releasenotes/altinity-kubernetes-operator-release-notes/) + * [The Altinity Sink Connector for ClickHouse](https://docs.altinity.com/releasenotes/altinity-sink-connector-release-notes/) and + * [Altinity Backup for ClickHouse](https://docs.altinity.com/releasenotes/altinity-backup-release-notes/) * [Altinity Blog](https://altinity.com/blog/): Blog posts about ClickHouse the database and Altinity services. The following sites are also useful references regarding ClickHouse: diff --git a/content/en/altinity-kb-dictionaries/dictionaries-and-arrays.md b/content/en/altinity-kb-dictionaries/dictionaries-and-arrays.md index db0352e2f2..fcbc0c5aac 100644 --- a/content/en/altinity-kb-dictionaries/dictionaries-and-arrays.md +++ b/content/en/altinity-kb-dictionaries/dictionaries-and-arrays.md @@ -4,7 +4,7 @@ linkTitle: "Dictionaries & arrays" description: > Dictionaries & arrays --- -## Dictionary with Clickhouse table as a source +## Dictionary with ClickHouse table as a source ### Test data diff --git a/content/en/altinity-kb-dictionaries/partial-updates.md b/content/en/altinity-kb-dictionaries/partial-updates.md index 088e562e84..c832b20b1f 100644 --- a/content/en/altinity-kb-dictionaries/partial-updates.md +++ b/content/en/altinity-kb-dictionaries/partial-updates.md @@ -4,7 +4,7 @@ linkTitle: "Partial updates" description: > Partial updates --- -Clickhouse is able to fetch from a source only updated rows. You need to define `update_field` section. +ClickHouse is able to fetch from a source only updated rows. You need to define `update_field` section. As an example, We have a table in an external source MySQL, PG, HTTP, ... defined with the following code sample: @@ -36,4 +36,4 @@ LIFETIME(MIN 30 MAX 30) A dictionary with **update_field** `updated_at` will fetch only updated rows. A dictionary saves the current time (now) time of the last successful update and queries the source `where updated_at >= previous_update - 1` (shift = 1 sec.). -In case of HTTP source Clickhouse will send get requests with **update_field** as an URL parameter `&updated_at=2020-01-01%2000:01:01` +In case of HTTP source ClickHouse will send get requests with **update_field** as an URL parameter `&updated_at=2020-01-01%2000:01:01` diff --git a/content/en/altinity-kb-dictionaries/security-named-collections.md b/content/en/altinity-kb-dictionaries/security-named-collections.md index 28ebe9a8fe..907b6d113c 100644 --- a/content/en/altinity-kb-dictionaries/security-named-collections.md +++ b/content/en/altinity-kb-dictionaries/security-named-collections.md @@ -6,7 +6,7 @@ description: > --- -## Dictionary with Clickhouse table as a source with named collections +## Dictionary with ClickHouse table as a source with named collections ### Data for connecting to external sources can be stored in named collections diff --git a/content/en/altinity-kb-integrations/Spark.md b/content/en/altinity-kb-integrations/Spark.md index ccb2ca0b3a..2f270b92ea 100644 --- a/content/en/altinity-kb-integrations/Spark.md +++ b/content/en/altinity-kb-integrations/Spark.md @@ -16,7 +16,7 @@ The trivial & natural way to talk to ClickHouse from Spark is using jdbc. There ClickHouse-Native-JDBC has some hints about integration with Spark even in the main README file. -'Official' driver does support some conversion of complex data types (Roarring bitmaps) for Spark-Clickhouse integration: https://github.com/ClickHouse/clickhouse-jdbc/pull/596 +'Official' driver does support some conversion of complex data types (Roarring bitmaps) for Spark-ClickHouse integration: https://github.com/ClickHouse/clickhouse-jdbc/pull/596 But proper partitioning of the data (to spark partitions) may be tricky with jdbc. @@ -58,12 +58,12 @@ Arrays, Higher-order functions, machine learning, integration with lot of differ * Using a bunch of ClickHouse and Spark in MFI Soft (Russian) https://www.youtube.com/watch?v=ID8eTnmag0s (russian) * Spark read and write ClickHouse (Chinese: Spark读写ClickHouse) https://yerias.github.io/2020/12/08/clickhouse/9/#Jdbc%E6%93%8D%E4%BD%9Cclickhouse * Spark JDBC write clickhouse operation summary (Chinese: Spark JDBC 写 clickhouse 操作总结) https://www.jianshu.com/p/43f78c8a025b?hmsr=toutiao.io&utm_campaign=toutiao.io&utm_medium=toutiao.io&utm_source=toutiao.io -* Spark-sql is based on Clickhouse's DataSourceV2 data source extension (Chinese: spark-sql基于Clickhouse的DataSourceV2数据源扩展) +* Spark-sql is based on ClickHouse's DataSourceV2 data source extension (Chinese: spark-sql基于ClickHouse的DataSourceV2数据源扩展) https://www.cnblogs.com/mengyao/p/4689866.html * Alibaba integration instructions (English) https://www.alibabacloud.com/help/doc-detail/191192.htm * Tencent integration instructions (English) https://intl.cloud.tencent.com/document/product/1026/35884 * Yandex DataProc demo: loading files from S3 to ClickHouse with Spark (Russian) https://www.youtube.com/watch?v=N3bZW0_rRzI -* Clickhouse official documentation_Spark JDBC writes some pits of ClickHouse (Chinese: clickhouse官方文档_Spark JDBC写ClickHouse的一些坑) https://blog.csdn.net/weixin_39615984/article/details/111206050 +* ClickHouse official documentation_Spark JDBC writes some pits of ClickHouse (Chinese: clickhouse官方文档_Spark JDBC写ClickHouse的一些坑) https://blog.csdn.net/weixin_39615984/article/details/111206050 * ClickHouse data import: Flink, Spark, Kafka, MySQL, Hive (Chinese: 篇五|ClickHouse数据导入 Flink、Spark、Kafka、MySQL、Hive) https://zhuanlan.zhihu.com/p/299094269 * Baifendian Big Data Technical Team: Practice of ClickHouse data synchronization solutionbased on multiple Spark tasks (Chinese: 百分点大数据技术团队:基于多 Spark 任务的 ClickHouse 数据同步方案实践) https://www.6aiq.com/article/1635461873075 * SPARK-CLICKHOUSE-ES REAL-TIME PROJECT EIGHTH DAY-PRECISE ONE-TIME CONSUMPTION SAVE OFFSET. (Chinese: SPARK-CLICKHOUSE-ES实时项目第八天-精确一次性消费保存偏移量) https://www.freesion.com/article/71421322524/ diff --git a/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-exactly-once-semantics.md b/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-exactly-once-semantics.md index 3432db9fd5..22211a9374 100644 --- a/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-exactly-once-semantics.md +++ b/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-exactly-once-semantics.md @@ -18,6 +18,6 @@ We need to have something like transactions on ClickHouse side to be able to avo ## block-aggregator by eBay -Block Aggregator is a data loader that subscribes to Kafka topics, aggregates the Kafka messages into blocks that follow the Clickhouse’s table schemas, and then inserts the blocks into ClickHouse. Block Aggregator provides exactly-once delivery guarantee to load data from Kafka to ClickHouse. Block Aggregator utilizes Kafka’s metadata to keep track of blocks that are intended to send to ClickHouse, and later uses this metadata information to deterministically re-produce ClickHouse blocks for re-tries in case of failures. The identical blocks are guaranteed to be deduplicated by ClickHouse. +Block Aggregator is a data loader that subscribes to Kafka topics, aggregates the Kafka messages into blocks that follow the ClickHouse’s table schemas, and then inserts the blocks into ClickHouse. Block Aggregator provides exactly-once delivery guarantee to load data from Kafka to ClickHouse. Block Aggregator utilizes Kafka’s metadata to keep track of blocks that are intended to send to ClickHouse, and later uses this metadata information to deterministically re-produce ClickHouse blocks for re-tries in case of failures. The identical blocks are guaranteed to be deduplicated by ClickHouse. [eBay/block-aggregator](https://github.com/eBay/block-aggregator) diff --git a/content/en/altinity-kb-integrations/catboost-mindsdb-fast.ai.md b/content/en/altinity-kb-integrations/catboost-mindsdb-fast.ai.md index acecf80f5a..ee41487d1c 100644 --- a/content/en/altinity-kb-integrations/catboost-mindsdb-fast.ai.md +++ b/content/en/altinity-kb-integrations/catboost-mindsdb-fast.ai.md @@ -11,7 +11,7 @@ Article is based on feedback provided by one of Altinity clients. CatBoost: * It uses gradient boosting - a hard to use technique which can outperform neural networks. Gradient boosting is powerful but it's easy to shoot yourself in the foot using it. -* The documentation on how to use it is quite lacking. The only good source of information on how to properly configure a model to yield good results is this video: [https://www.youtube.com/watch?v=usdEWSDisS0](https://www.youtube.com/watch?v=usdEWSDisS0) . We had to dig around GitHub issues to find out how to make it work with ClickHouse. +* The documentation on how to use it is quite lacking. The only good source of information on how to properly configure a model to yield good results is this video: [https://www.youtube.com/watch?v=usdEWSDisS0](https://www.youtube.com/watch?v=usdEWSDisS0) . We had to dig around GitHub issues to find out how to make it work with ClickHouse®. * CatBoost is fast. Other libraries will take ~5X to ~10X as long to do what CatBoost does. * CatBoost will do preprocessing out of the box (fills nulls, apply standard scaling, encodes strings as numbers). * CatBoost has all functions you'd need (metrics, plotters, feature importance) diff --git a/content/en/altinity-kb-integrations/mysql-clickhouse.md b/content/en/altinity-kb-integrations/mysql-clickhouse.md index bb80893313..ad521ccd00 100644 --- a/content/en/altinity-kb-integrations/mysql-clickhouse.md +++ b/content/en/altinity-kb-integrations/mysql-clickhouse.md @@ -1,9 +1,9 @@ --- title: "MySQL" -linkTitle: "Integration Clickhouse with MySQL" +linkTitle: "Integrating ClickHouse® with MySQL" weight: 100 description: >- - Integration Clickhouse with MySQL + Integrating ClickHouse® with MySQL --- ### Replication using MaterializeMySQL. diff --git a/content/en/altinity-kb-kubernetes/altinity-kb-possible-issues-with-running-clickhouse-in-k8s.md b/content/en/altinity-kb-kubernetes/altinity-kb-possible-issues-with-running-clickhouse-in-k8s.md index de1c5240ed..cdfc942c2f 100644 --- a/content/en/altinity-kb-kubernetes/altinity-kb-possible-issues-with-running-clickhouse-in-k8s.md +++ b/content/en/altinity-kb-kubernetes/altinity-kb-possible-issues-with-running-clickhouse-in-k8s.md @@ -34,7 +34,7 @@ kubectl logs chi-chcluster-2-1-0 -c clickhouse-pod -n chcluster --previous kubectl describe pod chi-chcluster-2-1-0 -n chcluster ``` -Q. Clickhouse is caching the Kafka pod's IP and trying to connect to the same ip even when there is a new Kafka pod running and the old one is deprecated. Is there some setting where we could refresh the connection +Q. ClickHouse is caching the Kafka pod's IP and trying to connect to the same ip even when there is a new Kafka pod running and the old one is deprecated. Is there some setting where we could refresh the connection `1` in config.xml diff --git a/content/en/altinity-kb-queries-and-syntax/_index.md b/content/en/altinity-kb-queries-and-syntax/_index.md index e771c1d459..46d710897d 100644 --- a/content/en/altinity-kb-queries-and-syntax/_index.md +++ b/content/en/altinity-kb-queries-and-syntax/_index.md @@ -5,6 +5,6 @@ keywords: - clickhouse queries - clickhouse joins description: > - Learn about ClickHouse queries & syntax, including Joins & Window Functions. + Learn about ClickHouse® queries & syntax, including Joins & Window Functions. weight: 1 --- diff --git a/content/en/altinity-kb-queries-and-syntax/altinity-kb-final-clause-speed.md b/content/en/altinity-kb-queries-and-syntax/altinity-kb-final-clause-speed.md index 77d2222267..949df68577 100644 --- a/content/en/altinity-kb-queries-and-syntax/altinity-kb-final-clause-speed.md +++ b/content/en/altinity-kb-queries-and-syntax/altinity-kb-final-clause-speed.md @@ -6,7 +6,7 @@ description: > --- `SELECT * FROM table FINAL` -* Before 20.5 - always executed in a single thread and slow. +* Before ClickHouse® 20.5 - always executed in a single thread and slow. * Since 20.5 - final can be parallel, see [https://github.com/ClickHouse/ClickHouse/pull/10463](https://github.com/ClickHouse/ClickHouse/pull/10463) * Since 20.10 - you can use `do_not_merge_across_partitions_select_final` setting. * Since 22.6 - final even more parallel, see [https://github.com/ClickHouse/ClickHouse/pull/36396](https://github.com/ClickHouse/ClickHouse/pull/36396) diff --git a/content/en/altinity-kb-queries-and-syntax/altinity-kb-kill-query.md b/content/en/altinity-kb-queries-and-syntax/altinity-kb-kill-query.md index f3feed94c2..255bc07de0 100644 --- a/content/en/altinity-kb-queries-and-syntax/altinity-kb-kill-query.md +++ b/content/en/altinity-kb-queries-and-syntax/altinity-kb-kill-query.md @@ -7,7 +7,7 @@ description: > Unfortunately not all queries can be killed. `KILL QUERY` only sets a flag that must be checked by the query. A query pipeline is checking this flag before a switching to next block. If the pipeline has stuck somewhere in the middle it cannot be killed. -If a query does not stop, the only way to get rid of it is to restart ClickHouse. +If a query does not stop, the only way to get rid of it is to restart ClickHouse®. See also: diff --git a/content/en/altinity-kb-queries-and-syntax/altinity-kb-optimize-vs-optimize-final.md b/content/en/altinity-kb-queries-and-syntax/altinity-kb-optimize-vs-optimize-final.md index 8d525b7bf0..a2f0a245a8 100644 --- a/content/en/altinity-kb-queries-and-syntax/altinity-kb-optimize-vs-optimize-final.md +++ b/content/en/altinity-kb-queries-and-syntax/altinity-kb-optimize-vs-optimize-final.md @@ -12,7 +12,7 @@ You have 40 parts in 3 partitions. This unscheduled merge selects some partition `OPTIMIZE TABLE xyz FINAL` -- initiates a cycle of unscheduled merges. -ClickHouse merges parts in this table until will remains 1 part in each partition (if a system has enough free disk space). As a result, you get 3 parts, 1 part per partition. In this case, CH rewrites parts even if they are already merged into a single part. It creates a huge CPU / Disk load if the table ( XYZ) is huge. ClickHouse reads / uncompress / merge / compress / writes all data in the table. +ClickHouse® merges parts in this table until will remains 1 part in each partition (if a system has enough free disk space). As a result, you get 3 parts, 1 part per partition. In this case, ClickHouse rewrites parts even if they are already merged into a single part. It creates a huge CPU / Disk load if the table (XYZ) is huge. ClickHouse reads / uncompress / merge / compress / writes all data in the table. If this table has size 1TB it could take around 3 hours to complete. diff --git a/content/en/altinity-kb-queries-and-syntax/altinity-kb-parameterized-views.md b/content/en/altinity-kb-queries-and-syntax/altinity-kb-parameterized-views.md index 6a4f1b2839..493bd04433 100644 --- a/content/en/altinity-kb-queries-and-syntax/altinity-kb-parameterized-views.md +++ b/content/en/altinity-kb-queries-and-syntax/altinity-kb-parameterized-views.md @@ -5,7 +5,7 @@ description: > Parameterized views --- -## ClickHouse version 23.1+ +## ClickHouse® version 23.1+ (23.1.6.42, 23.2.5.46, 23.3.1.2823) Have inbuild support for [parametrized views](https://clickhouse.com/docs/en/sql-reference/statements/create/view#parameterized-view): @@ -34,7 +34,7 @@ select * from v(xx=[1,2,3]); ``` -## ClickHouse versions per 23.1 +## ClickHouse versions pre 23.1 Custom settings allows to emulate parameterized views. diff --git a/content/en/altinity-kb-queries-and-syntax/altinity-kb-possible-deadlock-avoided.-client-should-retry.md b/content/en/altinity-kb-queries-and-syntax/altinity-kb-possible-deadlock-avoided.-client-should-retry.md index c1219e234e..61c6c0e99d 100644 --- a/content/en/altinity-kb-queries-and-syntax/altinity-kb-possible-deadlock-avoided.-client-should-retry.md +++ b/content/en/altinity-kb-queries-and-syntax/altinity-kb-possible-deadlock-avoided.-client-should-retry.md @@ -4,7 +4,7 @@ linkTitle: "Possible deadlock avoided. Client should retry" description: > Possible deadlock avoided. Client should retry --- -In version 19.14 a serious issue was found: a race condition that can lead to server deadlock. The reason for that was quite fundamental, and a temporary workaround for that was added ("possible deadlock avoided"). +In ClickHouse® version 19.14 a serious issue was found: a race condition that can lead to server deadlock. The reason for that was quite fundamental, and a temporary workaround for that was added ("possible deadlock avoided"). Those locks are one of the fundamental things that the core team was actively working on in 2020. diff --git a/content/en/altinity-kb-queries-and-syntax/altinity-kb-sample-by.md b/content/en/altinity-kb-queries-and-syntax/altinity-kb-sample-by.md index bef2c67819..7f7e010091 100644 --- a/content/en/altinity-kb-queries-and-syntax/altinity-kb-sample-by.md +++ b/content/en/altinity-kb-queries-and-syntax/altinity-kb-sample-by.md @@ -8,7 +8,7 @@ The execution pipeline is embedded in the partition reading code. So that works this way: -1. ClickHouse does partition pruning based on `WHERE` conditions. +1. ClickHouse® does partition pruning based on `WHERE` conditions. 2. For every partition, it picks a columns ranges (aka 'marks' / 'granulas') based on primary key conditions. 3. Here the sampling logic is applied: a) in case of `SAMPLE k` (`k` in `0..1` range) it adds conditions `WHERE sample_key < k * max_int_of_sample_key_type` b) in case of `SAMPLE k OFFSET m` it adds conditions `WHERE sample_key BETWEEN m * max_int_of_sample_key_type AND (m + k) * max_int_of_sample_key_type`c) in case of `SAMPLE N` (N>1) if first estimates how many rows are inside the range we need to read and based on that convert it to 3a case (calculate k based on number of rows in ranges and desired number of rows) 4. on the data returned by those other conditions are applied (so here the number of rows can be decreased here) @@ -56,4 +56,4 @@ SELECT count() FROM table WHERE ... AND cityHash64(some_high_card_key) % 10 = 0; SELECT count() FROM table WHERE ... AND rand() % 10 = 0; -- Non-deterministic ``` -ClickHouse will read more data from disk compared to an example with a good SAMPLE key, but it's more universal and can be used if you can't change table ORDER BY key. (To learn more about ClickHouse internals, [ClickHouse Administrator Training](https://altinity.com/clickhouse-training/) is available.) \ No newline at end of file +ClickHouse will read more data from disk compared to an example with a good SAMPLE key, but it's more universal and can be used if you can't change table ORDER BY key. (To learn more about ClickHouse internals, [Administrator Training for ClickHouse](https://altinity.com/clickhouse-training/) is available.) \ No newline at end of file diff --git a/content/en/altinity-kb-queries-and-syntax/ansi-sql-mode.md b/content/en/altinity-kb-queries-and-syntax/ansi-sql-mode.md index 61d3973b5d..54bf7eef9c 100644 --- a/content/en/altinity-kb-queries-and-syntax/ansi-sql-mode.md +++ b/content/en/altinity-kb-queries-and-syntax/ansi-sql-mode.md @@ -4,7 +4,7 @@ linkTitle: "ANSI SQL mode" description: > ANSI SQL mode --- -It's possible to tune some settings which would make ClickHouse more ANSI SQL compatible(and slower): +It's possible to tune some settings which would make ClickHouse® more ANSI SQL compatible(and slower): ```sql SET join_use_nulls=1; -- introduced long ago diff --git a/content/en/altinity-kb-queries-and-syntax/array-functions-as-window.md b/content/en/altinity-kb-queries-and-syntax/array-functions-as-window.md index 8a66dde750..47ac9a2d23 100644 --- a/content/en/altinity-kb-queries-and-syntax/array-functions-as-window.md +++ b/content/en/altinity-kb-queries-and-syntax/array-functions-as-window.md @@ -6,9 +6,7 @@ description: >- Using array functions to mimic window-functions alike behavior. --- -# Using array functions to mimic window functions alike behavior - -There are some usecases when you may want to mimic window functions using Arrays - as an optimization step, or to contol the memory better / use on-disk spiling, or just if you have old ClickHouse version. +There are some usecases when you may want to mimic window functions using Arrays - as an optimization step, or to contol the memory better / use on-disk spiling, or just if you have old ClickHouse® version. ## Running difference sample diff --git a/content/en/altinity-kb-queries-and-syntax/async-inserts.md b/content/en/altinity-kb-queries-and-syntax/async-inserts.md index 770935e1f1..f9fc18ab6f 100644 --- a/content/en/altinity-kb-queries-and-syntax/async-inserts.md +++ b/content/en/altinity-kb-queries-and-syntax/async-inserts.md @@ -5,14 +5,14 @@ description: > Async INSERTs --- -Async INSERTs is a ClickHouse feature tha enables batching data automatically and transparently on the server-side. Although async inserts work, they still have issues, but have been improved in latest versions. We recommend to batch at app/ingestor level because you will have more control and you decouple this responsibility from ClickHouse. Being said that here some insights about Async inserts you should now: +Async INSERTs is a ClickHouse® feature tha enables batching data automatically and transparently on the server-side. Although async inserts work, they still have issues, but have been improved in latest versions. We recommend to batch at app/ingestor level because you will have more control and you decouple this responsibility from ClickHouse. That being said, there are some insights about Async inserts you should now: * Async inserts give acknowledgment immediately after the data got inserted into the buffer (wait_for_async_insert = 0) or by default, after the data got written to a part after flushing from buffer (wait_for_async_insert = 1). * INSERT .. SELECT is NOT async insert. (You can use matView + Null table OR ephemeral columns instead of INPUT function, then ASYNC insert work) * Async inserts will do (idempotent) retries. * Async inserts can collect data for some offline remote clusters: Yandex self-driving cars were collecting the metrics data during the ride into ClickHouse installed on the car computer to a distributed table with Async inserts enabled, which were flushed to the cluster once the car was plugged to the network. * Async inserts can do batching, so multiple inserts can be squashed as a single insert (but in that case, retries are not idempotent anymore). -* Async inserts can loose your data in case of sudden restart (no fsyncs by default). +* Async inserts can lose your data in case of sudden restart (no fsyncs by default). * Async inserted data becomes available for selects not immediately after acknowledgment. * Async inserts generally have more `moving parts` there are some background threads monitoring new data to be sent and pushing it out. * Async inserts require extra monitoring from different system.tables (see `system.part_log`, `system.query_log` and `system.asynchronous_inserts` for 22.8). Previously such queries didn't appear in the query log. Check: [#33239](https://github.com/ClickHouse/ClickHouse/pull/33239). diff --git a/content/en/altinity-kb-queries-and-syntax/cumulative-unique.md b/content/en/altinity-kb-queries-and-syntax/cumulative-unique.md index a689645955..be9d017a75 100644 --- a/content/en/altinity-kb-queries-and-syntax/cumulative-unique.md +++ b/content/en/altinity-kb-queries-and-syntax/cumulative-unique.md @@ -20,7 +20,7 @@ INSERT INTO events SELECT FROM numbers(15); ``` -## Using window functions (starting from Clickhouse 21.3) +## Using window functions (starting from ClickHouse® 21.3) ```sql SELECT diff --git a/content/en/altinity-kb-queries-and-syntax/data-types-on-disk-and-in-ram.md b/content/en/altinity-kb-queries-and-syntax/data-types-on-disk-and-in-ram.md index 2150d339f9..daa99a3301 100644 --- a/content/en/altinity-kb-queries-and-syntax/data-types-on-disk-and-in-ram.md +++ b/content/en/altinity-kb-queries-and-syntax/data-types-on-disk-and-in-ram.md @@ -39,4 +39,4 @@ description: > -See also [https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup41/data_processing.pdf](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup41/data_processing.pdf) (slide 17-22) +See also the presentation [Data processing into ClickHouse®](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup41/data_processing.pdf), especially slides 17-22. diff --git a/content/en/altinity-kb-queries-and-syntax/distinct-vs-group-by-vs-limit-by.md b/content/en/altinity-kb-queries-and-syntax/distinct-vs-group-by-vs-limit-by.md index 2ec384e2a9..7b1bae0d5d 100644 --- a/content/en/altinity-kb-queries-and-syntax/distinct-vs-group-by-vs-limit-by.md +++ b/content/en/altinity-kb-queries-and-syntax/distinct-vs-group-by-vs-limit-by.md @@ -94,7 +94,7 @@ MemoryTracker: Peak memory usage (for query): 4.05 GiB. 0 rows in set. Elapsed: 4.852 sec. Processed 100.00 million rows, 800.00 MB (20.61 million rows/s., 164.88 MB/s.) -This query faster than first, because ClickHouse doesn't need to merge states for all keys, only for first 1000 (based on LIMIT) +This query faster than first, because ClickHouse® doesn't need to merge states for all keys, only for first 1000 (based on LIMIT) SELECT number % 1000 AS key diff --git a/content/en/altinity-kb-queries-and-syntax/explain-query.md b/content/en/altinity-kb-queries-and-syntax/explain-query.md index 3932c26da4..685453e570 100644 --- a/content/en/altinity-kb-queries-and-syntax/explain-query.md +++ b/content/en/altinity-kb-queries-and-syntax/explain-query.md @@ -27,7 +27,7 @@ SELECT ... * `SYNTAX` - query text after AST-level optimizations * `PLAN` - query execution plan * `PIPELINE` - query execution pipeline -* `ESTIMATE` - https://github.com/ClickHouse/ClickHouse/pull/26131 (since 21.9) +* `ESTIMATE` - See [Estimates for select query](https://github.com/ClickHouse/ClickHouse/pull/26131), available since ClickHouse® 21.9 * `indexes=1` supported starting from 21.6 (https://github.com/ClickHouse/ClickHouse/pull/22352 ) * `json=1` supported starting from 21.6 (https://github.com/ClickHouse/ClickHouse/pull/23082) diff --git a/content/en/altinity-kb-queries-and-syntax/group-by/_index.md b/content/en/altinity-kb-queries-and-syntax/group-by/_index.md index e7866bc3a9..fad38b457d 100644 --- a/content/en/altinity-kb-queries-and-syntax/group-by/_index.md +++ b/content/en/altinity-kb-queries-and-syntax/group-by/_index.md @@ -6,7 +6,7 @@ keywords: - clickhouse group by - clickhouse memory description: > - Learn about GROUP BY clause in ClickHouse. + Learn about GROUP BY clause in ClickHouse®. weight: 1 --- @@ -14,7 +14,7 @@ weight: 1 [Code](https://github.com/ClickHouse/ClickHouse/blob/8ab5270ded39c8b044f60f73c1de00c8117ab8f2/src/Interpreters/Aggregator.cpp#L382) -ClickHouse uses non-blocking? hash tables, so each thread has at least one hash table. +ClickHouse® uses non-blocking? hash tables, so each thread has at least one hash table. It makes easier to not care about sync between multiple threads, but has such disadvantages as: 1. Bigger memory usage. @@ -52,7 +52,7 @@ https://clickhouse.com/docs/en/sql-reference/statements/select/group-by/#select- ## optimize_aggregation_in_order GROUP BY -Usually it works slower than regular GROUP BY, because ClickHouse need's to read and process data in specific ORDER, which makes it much more complicated to parallelize reading and aggregating. +Usually it works slower than regular GROUP BY, because ClickHouse needs to read and process data in specific ORDER, which makes it much more complicated to parallelize reading and aggregating. But it use much less memory, because ClickHouse can stream resultset and there is no need to keep it in memory. @@ -143,7 +143,7 @@ Size of keys participated in GROUP BY 2. States of aggregation functions: -Be careful with function, which state can use unrestricted amount of memory and grow indefenetely: +Be careful with function, which state can use unrestricted amount of memory and grow indefinitely: - groupArray (groupArray(1000)()) - uniqExact (uniq,uniqCombined) diff --git a/content/en/altinity-kb-queries-and-syntax/group-by/tricks.md b/content/en/altinity-kb-queries-and-syntax/group-by/tricks.md index 0f107087f5..4595d6a279 100644 --- a/content/en/altinity-kb-queries-and-syntax/group-by/tricks.md +++ b/content/en/altinity-kb-queries-and-syntax/group-by/tricks.md @@ -193,7 +193,7 @@ Elapsed: 6.247 sec. Processed 1.00 billion rows, 27.00 GB (160.09 million rows/s ``` -It can be especially useful when you tries to do GROUP BY lc_column_1, lc_column_2 and ClickHouse falls back to serialized algorithm. +It can be especially useful when you tries to do GROUP BY lc_column_1, lc_column_2 and ClickHouse® falls back to serialized algorithm. ### Two LowCardinality Columns in GROUP BY diff --git a/content/en/altinity-kb-queries-and-syntax/lag-lead.md b/content/en/altinity-kb-queries-and-syntax/lag-lead.md index 8db5c6f649..9ca10840ea 100644 --- a/content/en/altinity-kb-queries-and-syntax/lag-lead.md +++ b/content/en/altinity-kb-queries-and-syntax/lag-lead.md @@ -59,7 +59,7 @@ order by g, a; └───┴────────────┴────────────┴────────────┘ ``` -## Using window functions (starting from Clickhouse 21.3) +## Using window functions (starting from ClickHouse® 21.3) ```sql SET allow_experimental_window_functions = 1; diff --git a/content/en/altinity-kb-queries-and-syntax/mutations.md b/content/en/altinity-kb-queries-and-syntax/mutations.md index b8cdf2f7a9..448e698933 100644 --- a/content/en/altinity-kb-queries-and-syntax/mutations.md +++ b/content/en/altinity-kb-queries-and-syntax/mutations.md @@ -15,7 +15,7 @@ SELECT hostname(), * FROM clusterAllReplicas('your_cluster_name', system.mutatio Look on `is_done` and `latest_fail_reason` columns -## Are mutations being run in parallel or they are sequential in ClickHouse (in scope of one table) +## Are mutations being run in parallel or they are sequential in ClickHouse® (in scope of one table) ![Mutations](/assets/mutations4.png) diff --git a/content/en/altinity-kb-queries-and-syntax/pivot-unpivot.md b/content/en/altinity-kb-queries-and-syntax/pivot-unpivot.md index 566edb50fa..1dec00e54e 100644 --- a/content/en/altinity-kb-queries-and-syntax/pivot-unpivot.md +++ b/content/en/altinity-kb-queries-and-syntax/pivot-unpivot.md @@ -12,7 +12,7 @@ CREATE TABLE sales(suppkey UInt8, category String, quantity UInt32) ENGINE=Memor INSERT INTO sales VALUES (2, 'AA' ,7500),(1, 'AB' , 4000),(1, 'AA' , 6900),(1, 'AB', 8900), (1, 'AC', 8300), (1, 'AA', 7000), (1, 'AC', 9000), (2,'AA', 9800), (2,'AB', 9600), (1,'AC', 8900),(1, 'AD', 400), (2,'AD', 900), (2,'AD', 1200), (1,'AD', 2600), (2, 'AC', 9600),(1, 'AC', 6200); ``` -### Using Map data type (starting from Clickhouse 21.1) +### Using Map data type (starting from ClickHouse® 21.1) ```sql WITH CAST(sumMap([category], [quantity]), 'Map(String, UInt32)') AS map diff --git a/content/en/altinity-kb-queries-and-syntax/row_policy_using_dictionary.md b/content/en/altinity-kb-queries-and-syntax/row_policy_using_dictionary.md index c62fb3226a..379875491e 100644 --- a/content/en/altinity-kb-queries-and-syntax/row_policy_using_dictionary.md +++ b/content/en/altinity-kb-queries-and-syntax/row_policy_using_dictionary.md @@ -3,7 +3,7 @@ title: "Row policies overhead (hiding 'removed' tenants)" linkTitle: "Row policies overhead" weight: 100 description: > - one more approach how to hide (delete) rows in Clickhouse. + one more approach how to hide (delete) rows in ClickHouse®. --- ## No row policy diff --git a/content/en/altinity-kb-queries-and-syntax/sampling-example.md b/content/en/altinity-kb-queries-and-syntax/sampling-example.md index 6548f39fdc..252351f226 100644 --- a/content/en/altinity-kb-queries-and-syntax/sampling-example.md +++ b/content/en/altinity-kb-queries-and-syntax/sampling-example.md @@ -2,9 +2,9 @@ title: "Sampling Example" linkTitle: "Sampling Example" description: > - ClickHouse table sampling example + ClickHouse® table sampling example --- -The most important idea about sampling that the primary index must have **LowCardinality**. (For more information, see [the Altinity Knowledge Base article on LowCardinality](../../altinity-kb-schema-design/lowcardinality) or [a ClickHouse user\'s lessons learned from LowCardinality](https://altinity.com/blog/2020-5-20-reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer)). +The most important idea about sampling that the primary index must have **LowCardinality**. (For more information, see [the Altinity Knowledge Base article on LowCardinality](../../altinity-kb-schema-design/lowcardinality) or [a ClickHouse® user\'s lessons learned from LowCardinality](https://altinity.com/blog/2020-5-20-reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer)). The following example demonstrates how sampling can be setup correctly, and an example if it being set up incorrectly as a comparison. diff --git a/content/en/altinity-kb-queries-and-syntax/simplestateif-or-ifstate-for-simple-aggregate-functions.md b/content/en/altinity-kb-queries-and-syntax/simplestateif-or-ifstate-for-simple-aggregate-functions.md index a966920fc0..aadcf31275 100644 --- a/content/en/altinity-kb-queries-and-syntax/simplestateif-or-ifstate-for-simple-aggregate-functions.md +++ b/content/en/altinity-kb-queries-and-syntax/simplestateif-or-ifstate-for-simple-aggregate-functions.md @@ -6,7 +6,7 @@ description: > --- ### Q. What is SimpleAggregateFunction? Are there advantages to use it instead of AggregateFunction in AggregatingMergeTree? -SimpleAggregateFunction can be used for those aggregations when the function state is exactly the same as the resulting function value. Typical example is `max` function: it only requires storing the single value which is already maximum, and no extra steps needed to get the final value. In contrast `avg` need to store two numbers - sum & count, which should be divided to get the final value of aggregation (done by the `-Merge` step at the very end). +The ClickHouse® SimpleAggregateFunction can be used for those aggregations when the function state is exactly the same as the resulting function value. Typical example is `max` function: it only requires storing the single value which is already maximum, and no extra steps needed to get the final value. In contrast `avg` need to store two numbers - sum & count, which should be divided to get the final value of aggregation (done by the `-Merge` step at the very end). diff --git a/content/en/altinity-kb-queries-and-syntax/skip-indexes/_index.md b/content/en/altinity-kb-queries-and-syntax/skip-indexes/_index.md index d44599af2f..962c1355d8 100644 --- a/content/en/altinity-kb-queries-and-syntax/skip-indexes/_index.md +++ b/content/en/altinity-kb-queries-and-syntax/skip-indexes/_index.md @@ -4,4 +4,4 @@ linkTitle: "Skip indexes" description: > Skip indexes --- -ClickHouse provides a type of index that in specific circumstances can significantly improve query speed. These structures are labeled "skip" indexes because they enable ClickHouse to skip reading significant chunks of data that are guaranteed to have no matching values. \ No newline at end of file +ClickHouse® provides a type of index that in specific circumstances can significantly improve query speed. These structures are labeled "skip" indexes because they enable ClickHouse to skip reading significant chunks of data that are guaranteed to have no matching values. \ No newline at end of file diff --git a/content/en/altinity-kb-queries-and-syntax/skip-indexes/skip-index-bloom_filter-for-array-column.md b/content/en/altinity-kb-queries-and-syntax/skip-indexes/skip-index-bloom_filter-for-array-column.md index f43d52a28d..f38fadb147 100644 --- a/content/en/altinity-kb-queries-and-syntax/skip-indexes/skip-index-bloom_filter-for-array-column.md +++ b/content/en/altinity-kb-queries-and-syntax/skip-indexes/skip-index-bloom_filter-for-array-column.md @@ -6,7 +6,7 @@ description: > aliases: /altinity-kb-queries-and-syntax/skip-indexes/example-skip-index-bloom_filter-and-array-column --- -tested with 20.8.17.25 +tested with ClickHouse® 20.8.17.25 [https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree/\#table_engine-mergetree-data_skipping-indexes](https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree/#table_engine-mergetree-data_skipping-indexes) @@ -40,7 +40,7 @@ select count() from bftest where has(x, -42); Processed 110.00 million rows, 9.68 GB (217.69 million rows/s., 19.16 GB/s.) ``` -As you can see Clickhouse read **110.00 million rows** and the query elapsed **Elapsed: 0.505 sec**. +As you can see ClickHouse read **110.00 million rows** and the query elapsed **Elapsed: 0.505 sec**. ### Let's add an index @@ -157,4 +157,3 @@ Also no improvement :( Outcome: I would use TYPE bloom_filter GRANULARITY 3. - 2021 Altinity Inc. All rights reserved. diff --git a/content/en/altinity-kb-queries-and-syntax/slow_select_count.md b/content/en/altinity-kb-queries-and-syntax/slow_select_count.md index 3cf5b9757b..6a2c80934d 100644 --- a/content/en/altinity-kb-queries-and-syntax/slow_select_count.md +++ b/content/en/altinity-kb-queries-and-syntax/slow_select_count.md @@ -1,11 +1,11 @@ --- -title: "Why is simple `SELECT count()` Slow in ClickHouse?" +title: "Why is simple `SELECT count()` Slow in ClickHouse®?" linkTitle: "Slow `SELECT count()`" weight: 100 description: >- --- -## Why is simple `SELECT count()` Slow in ClickHouse? +## Why is simple `SELECT count()` Slow in ClickHouse®? ClickHouse is a columnar database that provides excellent performance for analytical queries. However, in some cases, a simple count query can be slow. In this article, we'll explore the reasons why this can happen and how to optimize the query. diff --git a/content/en/altinity-kb-queries-and-syntax/state-and-merge-combinators.md b/content/en/altinity-kb-queries-and-syntax/state-and-merge-combinators.md index 44fe42bb5c..7334ffbed3 100644 --- a/content/en/altinity-kb-queries-and-syntax/state-and-merge-combinators.md +++ b/content/en/altinity-kb-queries-and-syntax/state-and-merge-combinators.md @@ -4,7 +4,7 @@ linkTitle: "-State & -Merge combinators" description: > -State & -Merge combinators --- --State combinator doesn't actually store information about -If combinator, so aggregate functions with -If and without have the same serialized data. +The ClickHouse® -State combinator doesn't actually store information about -If combinator, so aggregate functions with -If and without have the same serialized data. ```sql $ clickhouse-local --query "SELECT maxIfState(number,number % 2) as x, maxState(number) as y FROM numbers(10) FORMAT RowBinary" | clickhouse-local --input-format RowBinary --structure="x AggregateFunction(max,UInt64), y AggregateFunction(max,UInt64)" --query "SELECT maxMerge(x), maxMerge(y) FROM table" diff --git a/content/en/altinity-kb-queries-and-syntax/time-zones.md b/content/en/altinity-kb-queries-and-syntax/time-zones.md index e69d2ba9cb..911347e7a0 100644 --- a/content/en/altinity-kb-queries-and-syntax/time-zones.md +++ b/content/en/altinity-kb-queries-and-syntax/time-zones.md @@ -6,7 +6,7 @@ description: > --- Important things to know: -1. DateTime inside clickhouse is actually UNIX timestamp always, i.e. number of seconds since 1970-01-01 00:00:00 GMT. +1. DateTime inside ClickHouse® is actually UNIX timestamp always, i.e. number of seconds since 1970-01-01 00:00:00 GMT. 2. Conversion from that UNIX timestamp to a human-readable form and reverse can happen on the client (for native clients) and on the server (for HTTP clients, and for some type of queries, like `toString(ts)`) 3. Depending on the place where that conversion happened rules of different timezones may be applied. 4. You can check server timezone using `SELECT timezone()` @@ -34,7 +34,7 @@ toUnixTimestamp(toDateTime(now())): 1626432628 toUnixTimestamp(toDateTime(now(), 'UTC')): 1626432628 ``` -Since version 20.4 clickhouse uses embedded tzdata (see [https://github.com/ClickHouse/ClickHouse/pull/10425](https://github.com/ClickHouse/ClickHouse/pull/10425) ) +Since version 20.4 ClickHouse uses embedded tzdata (see [https://github.com/ClickHouse/ClickHouse/pull/10425](https://github.com/ClickHouse/ClickHouse/pull/10425) ) You get used tzdata version @@ -79,7 +79,7 @@ Query id: 855453d7-eccd-44cb-9631-f63bb02a273c ``` -Clickhouse uses system timezone info from tzdata package if it exists, and uses own builtin tzdata if it is missing in the system. +ClickHouse uses system timezone info from tzdata package if it exists, and uses own builtin tzdata if it is missing in the system. ``` cd /usr/share/zoneinfo/Canada @@ -119,4 +119,4 @@ SELECT * FROM t_with_dt_utc └─────────────────────────┘ ``` -Best practice here: use UTC timezone everywhere, OR use the same default timezone for clickhouse server as used by your data +Best practice here: use UTC timezone everywhere, OR use the same default timezone for ClickHouse server as used by your data diff --git a/content/en/altinity-kb-queries-and-syntax/top-n-and-remain.md b/content/en/altinity-kb-queries-and-syntax/top-n-and-remain.md index 928d787c30..8ca517f88d 100644 --- a/content/en/altinity-kb-queries-and-syntax/top-n-and-remain.md +++ b/content/en/altinity-kb-queries-and-syntax/top-n-and-remain.md @@ -98,7 +98,7 @@ ORDER BY res ASC └──────┴──────────┘ ``` -## Using window functions (starting from 21.1) +## Using window functions (starting from ClickHouse® 21.1) ```sql SET allow_experimental_window_functions = 1; diff --git a/content/en/altinity-kb-queries-and-syntax/trace_log.md b/content/en/altinity-kb-queries-and-syntax/trace_log.md index 64f445e082..dd651fecd3 100644 --- a/content/en/altinity-kb-queries-and-syntax/trace_log.md +++ b/content/en/altinity-kb-queries-and-syntax/trace_log.md @@ -8,7 +8,7 @@ description: >- ## Collecting query execution flamegraph using system.trace_log -ClickHouse has embedded functionality to analyze the details of query performance. +ClickHouse® has embedded functionality to analyze the details of query performance. It's `system.trace_log` table. diff --git a/content/en/altinity-kb-queries-and-syntax/troubleshooting.md b/content/en/altinity-kb-queries-and-syntax/troubleshooting.md index 5acdd7e673..85be6c694f 100644 --- a/content/en/altinity-kb-queries-and-syntax/troubleshooting.md +++ b/content/en/altinity-kb-queries-and-syntax/troubleshooting.md @@ -4,6 +4,9 @@ linkTitle: "Troubleshooting" description: > Troubleshooting --- + +Tips for ClickHouse® troubleshooting + ## Log of query execution Controlled by session level setting `send_logs_level` @@ -113,7 +116,7 @@ WITH SELECT concat('clickhouse-server@', version()) AS exporter, 'https://www.speedscope.app/file-format-schema.json' AS `$schema`, - concat('Clickhouse query id: ', query) AS name, + concat('ClickHouse query id: ', query) AS name, CAST(samples, 'Array(Tuple(type String, name String, unit String, startValue UInt64, endValue UInt64, samples Array(Array(UInt32)), weights Array(UInt32)))') AS profiles, CAST(tuple(arrayMap(x -> (demangle(addressToSymbol(x)), addressToLine(x)), uniq_frames)), 'Tuple(frames Array(Tuple(name String, line String)))') AS shared FROM diff --git a/content/en/altinity-kb-queries-and-syntax/ttl/modify-ttl.md b/content/en/altinity-kb-queries-and-syntax/ttl/modify-ttl.md index 9060f15fcb..349099143f 100644 --- a/content/en/altinity-kb-queries-and-syntax/ttl/modify-ttl.md +++ b/content/en/altinity-kb-queries-and-syntax/ttl/modify-ttl.md @@ -21,7 +21,7 @@ It's usually cheap and fast command. And any new INSERT after schema change will 2. `ALTER TABLE tbl MATERIALIZE TTL` Recalculate TTL for already exist parts. -It can be heavy operation, because ClickHouse will read column data & recalculate TTL & apply TTL expression. +It can be heavy operation, because ClickHouse® will read column data & recalculate TTL & apply TTL expression. You can disable this step completely by using `materialize_ttl_after_modify` user session setting (by default it's 1, so materialization is enabled). diff --git a/content/en/altinity-kb-queries-and-syntax/ttl/ttl-group-by-examples.md b/content/en/altinity-kb-queries-and-syntax/ttl/ttl-group-by-examples.md index 1f21e82091..790c5d72a3 100644 --- a/content/en/altinity-kb-queries-and-syntax/ttl/ttl-group-by-examples.md +++ b/content/en/altinity-kb-queries-and-syntax/ttl/ttl-group-by-examples.md @@ -28,7 +28,7 @@ TTL ts + interval 30 day ts = min(toStartOfDay(ts)); ``` -During TTL merges Clickhouse re-calculates values of columns in the SET section. +During TTL merges ClickHouse® re-calculates values of columns in the SET section. GROUP BY section should be a prefix of a table's PRIMARY KEY (the same as ORDER BY, if no separate PRIMARY KEY defined). @@ -180,7 +180,7 @@ GROUP BY m; └────────┴─────────┴────────────┴────────────────┴────────────────┘ ``` -During merges Clickhouse re-calculates **ts** columns as **min(toStartOfDay(ts))**. It's possible **only for the last column** of `SummingMergeTree` `ORDER BY` section `ORDER BY (key1, key2, toStartOfDay(ts), ts)` otherwise it will **break** the order of rows in the table. +During merges ClickHouse re-calculates **ts** columns as **min(toStartOfDay(ts))**. It's possible **only for the last column** of `SummingMergeTree` `ORDER BY` section `ORDER BY (key1, key2, toStartOfDay(ts), ts)` otherwise it will **break** the order of rows in the table. ### Example with AggregatingMergeTree table diff --git a/content/en/altinity-kb-queries-and-syntax/ttl/ttl-recompress-example.md b/content/en/altinity-kb-queries-and-syntax/ttl/ttl-recompress-example.md index d16fb93f23..51288f5a51 100644 --- a/content/en/altinity-kb-queries-and-syntax/ttl/ttl-recompress-example.md +++ b/content/en/altinity-kb-queries-and-syntax/ttl/ttl-recompress-example.md @@ -24,7 +24,7 @@ TTL event_time + toIntervalMonth(1) RECOMPRESS CODEC(ZSTD(1)), event_time + toIntervalMonth(6) RECOMPRESS CODEC(ZSTD(6); ``` -Default comression is LZ4 [https://clickhouse.tech/docs/en/operations/server-configuration-parameters/settings/\#server-settings-compression](https://clickhouse.tech/docs/en/operations/server-configuration-parameters/settings/#server-settings-compression) +Default comression is LZ4. See [the ClickHouse® documentation](https://clickhouse.com/docs/en/operations/server-configuration-parameters/settings#server-settings-compression) for more information. These TTL rules recompress data after 1 and 6 months. diff --git a/content/en/altinity-kb-queries-and-syntax/update-via-dictionary.md b/content/en/altinity-kb-queries-and-syntax/update-via-dictionary.md index 0a6a1c8343..6b4f93f79b 100644 --- a/content/en/altinity-kb-queries-and-syntax/update-via-dictionary.md +++ b/content/en/altinity-kb-queries-and-syntax/update-via-dictionary.md @@ -105,7 +105,7 @@ In case of Replicated installation, Dictionary should be created on all nodes an {{% /alert %}} {{% alert title="Info" color="info" %}} -[Starting](https://github.com/ClickHouse/ClickHouse/pull/10186) from 20.4, ClickHouse forbid by default any potential non-deterministic mutations. +[Starting](https://github.com/ClickHouse/ClickHouse/pull/10186) from 20.4, ClickHouse® forbid by default any potential non-deterministic mutations. This behavior controlled by setting `allow_nondeterministic_mutations`. You can append it to query like this `ALTER TABLE xxx UPDATE ... WHERE ... SETTINGS allow_nondeterministic_mutations = 1;` For `ON CLUSTER` queries, you would need to put this setting in default profile and restart ClickHouse servers. {{% /alert %}} diff --git a/content/en/altinity-kb-queries-and-syntax/variable-partitioning.md b/content/en/altinity-kb-queries-and-syntax/variable-partitioning.md index 0adfb97b9b..47eff17be2 100644 --- a/content/en/altinity-kb-queries-and-syntax/variable-partitioning.md +++ b/content/en/altinity-kb-queries-and-syntax/variable-partitioning.md @@ -6,7 +6,7 @@ description: >- Approach, which allow you to redefine partitioning without table creation. --- -In that example, partitioning is being calculated via `MATERIALIZED` column expression `toDate(toStartOfInterval(ts, toIntervalT(...)))`, but partition id also can be generated on application side and inserted to ClickHouse as is. +In that example, partitioning is being calculated via `MATERIALIZED` column expression `toDate(toStartOfInterval(ts, toIntervalT(...)))`, but partition id also can be generated on application side and inserted to ClickHouse® as is. ```sql CREATE TABLE tbl diff --git a/content/en/altinity-kb-queries-and-syntax/window-functions.md b/content/en/altinity-kb-queries-and-syntax/window-functions.md index 6b23f8df91..c4e26b62a1 100644 --- a/content/en/altinity-kb-queries-and-syntax/window-functions.md +++ b/content/en/altinity-kb-queries-and-syntax/window-functions.md @@ -7,11 +7,11 @@ description: > #### Resources: -* [Tutorial: ClickHouse Window Functions](https://altinity.com/blog/clickhouse-window-functions-current-state-of-the-art) +* [Tutorial: ClickHouse® Window Functions](https://altinity.com/blog/clickhouse-window-functions-current-state-of-the-art) * [Video: Fun with ClickHouse Window Functions](https://www.youtube.com/watch?v=sm_vUdMQz4s) * [Blog: Battle of the Views: ClickHouse Window View vs. Live View](https://altinity.com/blog/battle-of-the-views-clickhouse-window-view-vs-live-view) -#### How Do I Simulate Window Functions Using Arrays on older versions of clickhouse? +#### How Do I Simulate Window Functions Using Arrays on older versions of ClickHouse? 1. Group with groupArray. 2. Calculate the needed metrics. diff --git a/content/en/altinity-kb-schema-design/materialized-views/_index.md b/content/en/altinity-kb-schema-design/materialized-views/_index.md index f4b684dc24..a92f6dd86b 100644 --- a/content/en/altinity-kb-schema-design/materialized-views/_index.md +++ b/content/en/altinity-kb-schema-design/materialized-views/_index.md @@ -11,7 +11,7 @@ MATERIALIZED VIEWs in ClickHouse behave like AFTER INSERT TRIGGER to the left-mo # MATERIALIZED VIEWS -* Clickhouse and the magic of materialized views. Basics explained with examples: [webinar recording](https://altinity.com/webinarspage/2019/6/26/clickhouse-and-the-magic-of-materialized-views) +* ClickHouse and the magic of materialized views. Basics explained with examples: [webinar recording](https://altinity.com/webinarspage/2019/6/26/clickhouse-and-the-magic-of-materialized-views) * Everything you should know about materialized views. Very detailed information about internals: [video](https://youtu.be/ckChUkC3Pns?t=9353), [annotated presentation](https://den-crane.github.io/Everything_you_should_know_about_materialized_views_commented.pdf), [presentation](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup47/materialized_views.pdf) ## Best practices diff --git a/content/en/altinity-kb-schema-design/materialized-views/idempotent_inserts_mv.md b/content/en/altinity-kb-schema-design/materialized-views/idempotent_inserts_mv.md index 19ebd72039..901e3fb0d6 100644 --- a/content/en/altinity-kb-schema-design/materialized-views/idempotent_inserts_mv.md +++ b/content/en/altinity-kb-schema-design/materialized-views/idempotent_inserts_mv.md @@ -10,7 +10,7 @@ description: >- ClickHouse still does not have transactions. They will be implemented around 2022Q2. -Because of Clickhouse materialized view is a trigger. And an insert into a table and an insert into a subordinate materialized view it's two different inserts so they are not atomic alltogether. +Because of ClickHouse materialized view is a trigger. And an insert into a table and an insert into a subordinate materialized view it's two different inserts so they are not atomic alltogether. And insert into a materialized view may fail after the succesful insert into the table. In case of any failure a client gets the error about failed insertion. You may enable insert_deduplication (it's enabled by default for Replciated engines) and repeate the insert with an idea to achive idempotate insertion, @@ -148,7 +148,7 @@ select sum(CNT) from test_mv; └──────────┘ ``` -Idea how to fix it in Clickhouse source code https://github.com/ClickHouse/ClickHouse/issues/30240 +Idea how to fix it in ClickHouse source code https://github.com/ClickHouse/ClickHouse/issues/30240 ### Fake (unused) metric to add uniqueness. diff --git a/content/en/altinity-kb-schema-design/two-columns-indexing.md b/content/en/altinity-kb-schema-design/two-columns-indexing.md index f699e01557..34145a0f4d 100644 --- a/content/en/altinity-kb-schema-design/two-columns-indexing.md +++ b/content/en/altinity-kb-schema-design/two-columns-indexing.md @@ -22,7 +22,7 @@ select * from X where B = '0123456789' and ts between ...; and both A & B are high-cardinality values -Clickhouse primary skip index (ORDER BY/PRIMARY KEY) work great when you always include leading ORDER BY columns in WHERE filter. There is an exceptions for low-cardinality columns and high-correlated values, but here is another case. A & B both high cardinality and seems that their correlation is at medium level. +ClickHouse primary skip index (ORDER BY/PRIMARY KEY) work great when you always include leading ORDER BY columns in WHERE filter. There is an exceptions for low-cardinality columns and high-correlated values, but here is another case. A & B both high cardinality and seems that their correlation is at medium level. Various solutions exist, and their effectiveness largely depends on the correlation of different column data. It is necessary to test all solutions on actual data to select the best one. diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-aggressive_merges.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-aggressive_merges.md index 5128a6c688..c1daf437d8 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-aggressive_merges.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-aggressive_merges.md @@ -7,7 +7,7 @@ description: > -Q: Is there any way I can dedicate more resources to the merging process when running Clickhouse on pretty beefy machines (like 36 cores, 1TB of RAM, and large NVMe disks)? +Q: Is there any way I can dedicate more resources to the merging process when running ClickHouse on pretty beefy machines (like 36 cores, 1TB of RAM, and large NVMe disks)? Mostly such things doing by changing the level of parallelism: diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-converting-mergetree-to-replicated.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-converting-mergetree-to-replicated.md index 28e892ce15..b0591084aa 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-converting-mergetree-to-replicated.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-converting-mergetree-to-replicated.md @@ -14,7 +14,7 @@ Options here are: ## example for option 2 -Note: ATTACH PARTITION ID 'bar' FROM 'foo'` is practically free from compute and disk space perspective. This feature utilizes filesystem hard-links and the fact that files are immutable in Clickhouse ( it's the core of the Clickhouse design, filesystem hard-links and such file manipulations are widely used ). +Note: ATTACH PARTITION ID 'bar' FROM 'foo'` is practically free from compute and disk space perspective. This feature utilizes filesystem hard-links and the fact that files are immutable in ClickHouse ( it's the core of the ClickHouse design, filesystem hard-links and such file manipulations are widely used ). ```sql create table foo( A Int64, D Date, S String ) diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/altinity-kb-clickhouse-copier-20.3-and-earlier.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/altinity-kb-clickhouse-copier-20.3-and-earlier.md index ece660b90a..d2d46103e4 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/altinity-kb-clickhouse-copier-20.3-and-earlier.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/altinity-kb-clickhouse-copier-20.3-and-earlier.md @@ -4,10 +4,10 @@ linkTitle: "clickhouse-copier 20.3 and earlier" description: > clickhouse-copier 20.3 and earlier --- -Clickhouse-copier was created to move data between clusters. +`clickhouse-copier` was created to move data between clusters. It runs simple INSERT…SELECT queries and can copy data between tables with different engine parameters and between clusters with different number of shards. In the task configuration file you need to describe the layout of the source and the target cluster, and list the tables that you need to copy. You can copy whole tables or specific partitions. -Clickhouse-copier uses temporary distributed tables to select from the source cluster and insert into the target cluster. +`clickhouse-copier` uses temporary distributed tables to select from the source cluster and insert into the target cluster. ## The process is as follows @@ -27,17 +27,17 @@ If a worker was interrupted, another worker can be started to continue the task. ## Configuring the engine of the target table -Clickhouse-copier uses the engine from the task configuration file for these purposes: +`clickhouse-copier` uses the engine from the task configuration file for these purposes: * to create target tables if they don’t exist. * PARTITION BY: to SELECT a partition of data from the source table, to DROP existing partitions from target tables. -Clickhouse-copier does not support the old MergeTree format. -However, you can create the target tables manually and specify the engine in the task configuration file in the new format so that clickhouse-copier can parse it for its SELECT queries. +`clickhouse-copier` does not support the old MergeTree format. +However, you can create the target tables manually and specify the engine in the task configuration file in the new format so that `clickhouse-copier` can parse it for its SELECT queries. ## How to monitor the status of running tasks -Clickhouse-copier uses ZooKeeper to keep track of the progress and to communicate between workers. +`clickhouse-copier` uses ZooKeeper to keep track of the progress and to communicate between workers. Here is a list of queries that you can use to see what’s happening. ```sql diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/altinity-kb-clickhouse-copier-20.4_21.6.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/altinity-kb-clickhouse-copier-20.4_21.6.md index ed95099d84..c3bf83b6a5 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/altinity-kb-clickhouse-copier-20.4_21.6.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/altinity-kb-clickhouse-copier-20.4_21.6.md @@ -6,16 +6,16 @@ description: > aliases: /altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/altinity-kb-clickhouse-copier-20.4+/ --- -Clickhouse-copier was created to move data between clusters. +`clickhouse-copier` was created to move data between clusters. It runs simple `INSERT…SELECT` queries and can copy data between tables with different engine parameters and between clusters with different number of shards. In the task configuration file you need to describe the layout of the source and the target cluster, and list the tables that you need to copy. You can copy whole tables or specific partitions. -Clickhouse-copier uses temporary distributed tables to select from the source cluster and insert into the target cluster. +`clickhouse-copier` uses temporary distributed tables to select from the source cluster and insert into the target cluster. -The behavior of clickhouse-copier was changed in 20.4: +The behavior of `clickhouse-copier` was changed in 20.4: -* Now clickhouse-copier inserts data into intermediate tables, and after the insert finishes successfully clickhouse-copier attaches the completed partition into the target table. This allows for incremental data copying, because the data in the target table is intact during the process. **Important note:** ATTACH PARTITION respects the `max_partition_size_to_drop` limit. Make sure the `max_partition_size_to_drop` limit is big enough (or set to zero) in the destination cluster. If clickhouse-copier is unable to attach a partition because of the limit, it will proceed to the next partition, and it will drop the intermediate table when the task is finished (if the intermediate table is less than the `max_table_size_to_drop` limit). **Another important note:** ATTACH PARTITION is replicated. The attached partition will need to be downloaded by the other replicas. This can create significant network traffic between ClickHouse nodes. If an attach takes a long time, clickhouse-copier will log a timeout and will proceed to the next step. -* Now clickhouse-copier splits the source data into chunks and copies them one by one. This is useful for big source tables, when inserting one partition of data can take hours. If there is an error during the insert clickhouse-copier has to drop the whole partition and start again. The `number_of_splits` parameter lets you split your data into chunks so that in case of an exception clickhouse-copier has to re-insert only one chunk of the data. -* Now clickhouse-copier runs `OPTIMIZE target_table PARTITION ... DEDUPLICATE` for non-Replicated MergeTree tables. **Important note:** This is a very strange feature that can do more harm than good. We recommend to disable it by configuring the engine of the target table as Replicated in the task configuration file, and create the target tables manually if they are not supposed to be replicated. Intermediate tables are always created as plain MergeTree. +* Now `clickhouse-copier` inserts data into intermediate tables, and after the insert finishes successfully `clickhouse-copier` attaches the completed partition into the target table. This allows for incremental data copying, because the data in the target table is intact during the process. **Important note:** ATTACH PARTITION respects the `max_partition_size_to_drop` limit. Make sure the `max_partition_size_to_drop` limit is big enough (or set to zero) in the destination cluster. If `clickhouse-copier` is unable to attach a partition because of the limit, it will proceed to the next partition, and it will drop the intermediate table when the task is finished (if the intermediate table is less than the `max_table_size_to_drop` limit). **Another important note:** ATTACH PARTITION is replicated. The attached partition will need to be downloaded by the other replicas. This can create significant network traffic between ClickHouse nodes. If an attach takes a long time, `clickhouse-copier` will log a timeout and will proceed to the next step. +* Now `clickhouse-copier` splits the source data into chunks and copies them one by one. This is useful for big source tables, when inserting one partition of data can take hours. If there is an error during the insert `clickhouse-copier` has to drop the whole partition and start again. The `number_of_splits` parameter lets you split your data into chunks so that in case of an exception `clickhouse-copier` has to re-insert only one chunk of the data. +* Now `clickhouse-copier` runs `OPTIMIZE target_table PARTITION ... DEDUPLICATE` for non-Replicated MergeTree tables. **Important note:** This is a very strange feature that can do more harm than good. We recommend to disable it by configuring the engine of the target table as Replicated in the task configuration file, and create the target tables manually if they are not supposed to be replicated. Intermediate tables are always created as plain MergeTree. ## The process is as follows @@ -39,23 +39,23 @@ If a worker was interrupted, another worker can be started to continue the task. ## Configuring the engine of the target table -Clickhouse-copier uses the engine from the task configuration file for these purposes: +`clickhouse-copier` uses the engine from the task configuration file for these purposes: * to create target and intermediate tables if they don’t exist. * PARTITION BY: to SELECT a partition of data from the source table, to ATTACH partitions into target tables, to DROP incomplete partitions from intermediate tables, to OPTIMIZE partitions after they are attached to the target. * ORDER BY: to SELECT a chunk of data from the source table. -Here is an example of SELECT that clickhouse-copier runs to get the sixth of ten chunks of data: +Here is an example of SELECT that `clickhouse-copier` runs to get the sixth of ten chunks of data: ```sql WHERE ( = ( AS partition_key)) AND (cityHash64() % 10 = 6 ) ``` -Clickhouse-copier does not support the old MergeTree format. -However, you can create the intermediate tables manually with the same engine as the target tables (otherwise ATTACH will not work), and specify the engine in the task configuration file in the new format so that clickhouse-copier can parse it for SELECT, ATTACH PARTITION and DROP PARTITION queries. +`clickhouse-copier` does not support the old MergeTree format. +However, you can create the intermediate tables manually with the same engine as the target tables (otherwise ATTACH will not work), and specify the engine in the task configuration file in the new format so that `clickhouse-copier` can parse it for SELECT, ATTACH PARTITION and DROP PARTITION queries. -**Important note**: always configure engine as Replicated to disable OPTIMIZE … DEDUPLICATE (unless you know why you need clickhouse-copier to run OPTIMIZE … DEDUPLICATE). +**Important note**: always configure engine as Replicated to disable OPTIMIZE … DEDUPLICATE (unless you know why you need `clickhouse-copier` to run OPTIMIZE … DEDUPLICATE). ## How to configure the number of chunks @@ -72,7 +72,7 @@ You can change this parameter in the `table` section of the task configuration f ## How to monitor the status of running tasks -Clickhouse-copier uses ZooKeeper to keep track of the progress and to communicate between workers. +`clickhouse-copier` uses ZooKeeper to keep track of the progress and to communicate between workers. Here is a list of queries that you can use to see what’s happening. ```sql diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/altinity-kb-clickhouse-copier-kubernetes-job.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/altinity-kb-clickhouse-copier-kubernetes-job.md index c14c39574f..d5925bf5e0 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/altinity-kb-clickhouse-copier-kubernetes-job.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/altinity-kb-clickhouse-copier-kubernetes-job.md @@ -6,7 +6,7 @@ description: > --- # ClickHouse-copier deployment in kubernetes -Clickhouse-copier can be deployed in a kubernetes environment to automate some simple backups or copy fresh data between clusters. +`clickhouse-copier` can be deployed in a kubernetes environment to automate some simple backups or copy fresh data between clusters. Some documentation to read: * https://kb.altinity.com/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/ @@ -166,7 +166,7 @@ and deploy: kubectl -n clickhouse-copier create -f ./kubernetes/copier-configmap.yaml ``` -The ```task01.xml``` file has many parameters to take into account explained in the [clickhouse-copier documentation](https://clickhouse.com/docs/en/operations/utilities/clickhouse-copier/). Important to note that it is needed a FQDN for the zookeeper nodes and clickhouse server that are valid for the cluster. As the deployment creates a new namespace, it is recommended to use a FQDN linked to a service. For example ```zookeeper01.svc.cluster.local```. This file should be adapted to both clusters topologies and to the needs of the user. +The ```task01.xml``` file has many parameters to take into account explained in the [clickhouse-copier documentation](https://clickhouse.com/docs/en/operations/utilities/clickhouse-copier/). Important to note that it is needed a FQDN for the Zookeeper nodes and ClickHouse server that are valid for the cluster. As the deployment creates a new namespace, it is recommended to use a FQDN linked to a service. For example ```zookeeper01.svc.cluster.local```. This file should be adapted to both clusters topologies and to the needs of the user. The ```zookeeper.xml``` file is pretty straightforward with a simple 3 node ensemble configuration. diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/remote-table-function.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/remote-table-function.md index df00bff6dd..3f9446be46 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/remote-table-function.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/remote-table-function.md @@ -33,7 +33,7 @@ While of course it should be checked, every case is different. Yes, by the cost of extra memory usage (on the receiver side). -Clickhouse tries to form blocks of data in memory and while one of limit: `min_insert_block_size_rows` or `min_insert_block_size_bytes` being hit, clickhouse dump this block on disk. If clickhouse tries to execute insert in parallel (`max_insert_threads > 1`), it would form multiple blocks at one time. +ClickHouse tries to form blocks of data in memory and while one of limit: `min_insert_block_size_rows` or `min_insert_block_size_bytes` being hit, ClickHouse dump this block on disk. If ClickHouse tries to execute insert in parallel (`max_insert_threads > 1`), it would form multiple blocks at one time. So maximum memory usage can be calculated like this: `max_insert_threads * first(min_insert_block_size_rows OR min_insert_block_size_bytes)` Default values: diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-ddlworker/there-are-n-unfinished-hosts-0-of-them-are-currently-active.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-ddlworker/there-are-n-unfinished-hosts-0-of-them-are-currently-active.md index 5d00a68d98..6b2a655f16 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-ddlworker/there-are-n-unfinished-hosts-0-of-them-are-currently-active.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-ddlworker/there-are-n-unfinished-hosts-0-of-them-are-currently-active.md @@ -8,7 +8,7 @@ Sometimes your Distributed DDL queries are being stuck, and not executing on all ## Possible reasons -### Clickhouse node can't recognize itself +### ClickHouse node can't recognize itself ```sql SELECT * FROM system.clusters; -- check is_local column, it should have 1 for itself diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-s3-object-storage/s3_and_mutations.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-s3-object-storage/s3_and_mutations.md index 227a2c7d48..1c5c32500e 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-s3-object-storage/s3_and_mutations.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-s3-object-storage/s3_and_mutations.md @@ -3,7 +3,7 @@ title: "How much data are written to S3 during mutations" linkTitle: "s3 and mutations" weight: 100 description: >- - Example of how much data Clickhouse reads and writes to s3 during mutations. + Example of how much data ClickHouse reads and writes to s3 during mutations. --- ## Configuration diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-s3-object-storage/s3_cache_example.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-s3-object-storage/s3_cache_example.md index 4fd2316e7e..cf834ac01d 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-s3-object-storage/s3_cache_example.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-s3-object-storage/s3_cache_example.md @@ -249,7 +249,7 @@ alter table mydata move partition id '202301' to volume 's3cached'; └───────────┴───────────┴───────────┴────────────┴────────────┘ ``` -## S3 and Clickhouse start time +## S3 and ClickHouse start time Let's create a table with 1000 parts and move them to s3. ```sql diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-server-config-files.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-server-config-files.md index d694b522c6..edc07bbb5b 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-server-config-files.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-server-config-files.md @@ -2,16 +2,16 @@ title: "Server config files" linkTitle: "Server config files" description: > - How to manage server config files in Clickhouse + How to manage server config files in ClickHouse --- ## Сonfig management (recommended structure) -Clickhouse server config consists of two parts server settings (config.xml) and users settings (users.xml). +ClickHouse server config consists of two parts server settings (config.xml) and users settings (users.xml). By default they are stored in the folder **/etc/clickhouse-server/** in two files config.xml & users.xml. -We suggest never change vendor config files and place your changes into separate .xml files in sub-folders. This way is easier to maintain and ease Clickhouse upgrades. +We suggest never change vendor config files and place your changes into separate .xml files in sub-folders. This way is easier to maintain and ease ClickHouse upgrades. **/etc/clickhouse-server/users.d** – sub-folder for user settings. @@ -207,7 +207,7 @@ $ cat /etc/clickhouse-server/config.d/dictionaries.xml ``` -**dict/\*.xml** – relative path, servers seeks files in the folder **/etc/clickhouse-server/dict**. More info in [Multiple Clickhouse instances](altinity-kb-server-config-files.md#Multiple-Clickhouse-instances). +**dict/\*.xml** – relative path, servers seeks files in the folder **/etc/clickhouse-server/dict**. More info in [Multiple ClickHouse instances](#Multiple-ClickHouse-instances-at-one-host). ## incl attribute & metrica.xml @@ -264,16 +264,16 @@ $ cat /etc/clickhouse-server/dict/country.xml ``` -## Multiple Clickhouse instances at one host +## Multiple ClickHouse instances at one host -By default Clickhouse server configs are in **/etc/clickhouse-server/** because clickhouse-server runs with a parameter **--config-file /etc/clickhouse-server/config.xml** +By default ClickHouse server configs are in **/etc/clickhouse-server/** because clickhouse-server runs with a parameter **--config-file /etc/clickhouse-server/config.xml** **config-file** is defined in startup scripts: * **/etc/init.d/clickhouse-server** – init-V * **/etc/systemd/system/clickhouse-server.service** – systemd -Clickhouse uses the path from **config-file** parameter as base folder and seeks for other configs by relative path. All sub-folders **users.d / config.d** are relative. +ClickHouse uses the path from **config-file** parameter as base folder and seeks for other configs by relative path. All sub-folders **users.d / config.d** are relative. You can start multiple **clickhouse-server** each with own **--config-file.** @@ -318,7 +318,7 @@ By default ClickHouse uses **/var/lib/clickhouse/**. It can be overridden in pat ## preprocessed_configs -Clickhouse server watches config files and folders. When you change, add or remove XML files Clickhouse immediately assembles XML files into a combined file. These combined files are stored in **/var/lib/clickhouse/preprocessed_configs/** folders. +ClickHouse server watches config files and folders. When you change, add or remove XML files ClickHouse immediately assembles XML files into a combined file. These combined files are stored in **/var/lib/clickhouse/preprocessed_configs/** folders. You can verify that your changes are valid by checking **/var/lib/clickhouse/preprocessed_configs/config.xml**, **/var/lib/clickhouse/preprocessed_configs/users.xml**. diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-system-tables-eat-my-disk.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-system-tables-eat-my-disk.md index 73559054e7..7099b75701 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-system-tables-eat-my-disk.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-system-tables-eat-my-disk.md @@ -8,11 +8,11 @@ description: > > > Virtual tables do not persist on disk. They reflect ClickHouse memory (c++ structures). They cannot be changed or removed. > -> Log tables are named with postfix \***_log** and have the MergeTree engine. Clickhouse does not use information stored in these tables, this data is for you only. +> Log tables are named with postfix \***_log** and have the MergeTree engine. ClickHouse does not use information stored in these tables, this data is for you only. > > You can drop / rename / truncate \***_log** tables at any time. ClickHouse will recreate them in about 7 seconds (flush period). -> **Note 2:** Log tables with numeric postfixes (_1 / 2 / 3 ...) `query_log_1 query_thread_log_3` are results of Clickhouse upgrades. When a new version of Clickhouse starts and discovers that a system log table's schema is incompatible with a new schema, then Clickhouse renames the old *_log table to the name with the prefix and creates a table with the new schema. You can drop such tables if you don't need such historic data. +> **Note 2:** Log tables with numeric postfixes (_1 / 2 / 3 ...) `query_log_1 query_thread_log_3` are results of ClickHouse upgrades. When a new version of ClickHouse starts and discovers that a system log table's schema is incompatible with a new schema, then ClickHouse renames the old *_log table to the name with the prefix and creates a table with the new schema. You can drop such tables if you don't need such historic data. ## You can disable all / any of them diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/clickhouse-keeper.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/clickhouse-keeper.md index 6c7e6c07aa..64576cebb6 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/clickhouse-keeper.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/clickhouse-keeper.md @@ -30,7 +30,7 @@ if you want to play with clickhouse-keeper in some environment - please use the Official docs: https://clickhouse.com/docs/en/guides/sre/keeper/clickhouse-keeper/ -Clickhouse-keeper still need to be started additionally on few nodes (similar to 'normal' zookeeper) and speaks normal zookeeper protocol - needed to simplify A/B tests with real zookeeper. +ClickHouse-keeper still need to be started additionally on few nodes (similar to 'normal' zookeeper) and speaks normal zookeeper protocol - needed to simplify A/B tests with real zookeeper. To test that you need to run 3 instances of clickhouse-server (which will mimic zookeeper) with an extra config like that: @@ -55,9 +55,9 @@ https://kb.altinity.com/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/ See https://kb.altinity.com/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/clickhouse-keeper-initd/ -## Example of a simple cluster with 2 nodes of Clickhouse using built-in keeper +## Example of a simple cluster with 2 nodes of ClickHouse using built-in keeper -For example you can start two Clikhouse nodes (hostname1, hostname2) +For example you can start two ClickHouse nodes (hostname1, hostname2) ### hostname1 diff --git a/content/en/altinity-kb-setup-and-maintenance/ch-logs-2-json-vectordev.md b/content/en/altinity-kb-setup-and-maintenance/ch-logs-2-json-vectordev.md index 0b18b897a4..a4e03ce42e 100644 --- a/content/en/altinity-kb-setup-and-maintenance/ch-logs-2-json-vectordev.md +++ b/content/en/altinity-kb-setup-and-maintenance/ch-logs-2-json-vectordev.md @@ -1,9 +1,9 @@ --- -title: "Transformation Clickhouse logs to ndjson using Vector.dev" -linkTitle: "Clickhouse logs and Vector.dev" +title: "Transforming ClickHouse logs to ndjson using Vector.dev" +linkTitle: "ClickHouse logs and Vector.dev" weight: 100 description: >- - Transformation Clickhouse logs to ndjson using Vector.dev + Transforming ClickHouse logs to ndjson using Vector.dev --- ### ClickHouse 22.8 @@ -27,7 +27,7 @@ Starting from 22.8 version, ClickHouse support writing logs in JSON format: ``` -## Transformation Clickhouse logs to ndjson using Vector.dev" +## Transforming ClickHouse logs to ndjson using Vector.dev" ### Installation of vector.dev diff --git a/content/en/altinity-kb-setup-and-maintenance/disk_encryption.md b/content/en/altinity-kb-setup-and-maintenance/disk_encryption.md index 85e51aef4e..73d3a79cf4 100644 --- a/content/en/altinity-kb-setup-and-maintenance/disk_encryption.md +++ b/content/en/altinity-kb-setup-and-maintenance/disk_encryption.md @@ -1,5 +1,5 @@ --- -title: "Clickhouse data/disk encryption (at rest)" +title: "ClickHouse data/disk encryption (at rest)" linkTitle: "disk encryption" weight: 100 description: >- diff --git a/content/en/altinity-kb-setup-and-maintenance/filesystems.md b/content/en/altinity-kb-setup-and-maintenance/filesystems.md index c39554e823..8e030ead5c 100644 --- a/content/en/altinity-kb-setup-and-maintenance/filesystems.md +++ b/content/en/altinity-kb-setup-and-maintenance/filesystems.md @@ -41,7 +41,7 @@ Limitations exist, extra tuning may be needed, and having more RAM is recommende Memory usage control - ZFS adaptive replacement cache (ARC) can take a lot of RAM. It can be the reason of out-of-memory issues when memory is also requested by the ClickHouse. -* It seems that the most important thing is zfs_arc_max - you just need to limit the maximum size of the ARC so that the sum of the maximum size of the arc + the CH itself does not exceed the size of the available RAM. For example, we set a limit of 80% RAM for Clickhouse and 10% for ARC. 10% will remain for the system and other applications +* It seems that the most important thing is zfs_arc_max - you just need to limit the maximum size of the ARC so that the sum of the maximum size of the arc + the CH itself does not exceed the size of the available RAM. For example, we set a limit of 80% RAM for ClickHouse and 10% for ARC. 10% will remain for the system and other applications Tuning: * another potentially interesting setting is primarycache=metadata, see benchmark example: https://www.ikus-soft.com/en/blog/2018-05-23-proxmox-primarycache-all-metadata/ diff --git a/content/en/altinity-kb-setup-and-maintenance/schema-migration-tools/_index.md b/content/en/altinity-kb-setup-and-maintenance/schema-migration-tools/_index.md index 8da7e266b7..35b95f7dba 100644 --- a/content/en/altinity-kb-setup-and-maintenance/schema-migration-tools/_index.md +++ b/content/en/altinity-kb-setup-and-maintenance/schema-migration-tools/_index.md @@ -13,12 +13,12 @@ description: > * Flyway * [Official community supported plugin](https://documentation.red-gate.com/flyway/flyway-cli-and-api/supported-databases/clickhouse-database) [git](https://github.com/flyway/flyway-community-db-support/tree/main/flyway-database-clickhouse) * Old pull requests (latest at the top): - * [https://github.com/flyway/flyway/pull/3333](https://github.com/flyway/flyway/pull/3333) Сlickhouse support - * [https://github.com/flyway/flyway/pull/3134](https://github.com/flyway/flyway/pull/3134) Сlickhouse support - * [https://github.com/flyway/flyway/pull/3133](https://github.com/flyway/flyway/pull/3133) Add support clickhouse - * [https://github.com/flyway/flyway/pull/2981](https://github.com/flyway/flyway/pull/2981) Clickhouse replicated + * [https://github.com/flyway/flyway/pull/3333](https://github.com/flyway/flyway/pull/3333) СlickHouse support + * [https://github.com/flyway/flyway/pull/3134](https://github.com/flyway/flyway/pull/3134) СlickHouse support + * [https://github.com/flyway/flyway/pull/3133](https://github.com/flyway/flyway/pull/3133) Add support ClickHouse + * [https://github.com/flyway/flyway/pull/2981](https://github.com/flyway/flyway/pull/2981) ClickHouse replicated * [https://github.com/flyway/flyway/pull/2640](https://github.com/flyway/flyway/pull/2640) Yet another ClickHouse support - * [https://github.com/flyway/flyway/pull/2166](https://github.com/flyway/flyway/pull/2166) Clickhouse support (\#1772) + * [https://github.com/flyway/flyway/pull/2166](https://github.com/flyway/flyway/pull/2166) ClickHouse support (\#1772) * [https://github.com/flyway/flyway/pull/1773](https://github.com/flyway/flyway/pull/1773) Fixed \#1772: Add support for ClickHouse ([https://clickhouse.yandex/](https://clickhouse.yandex/)) * [alembic](https://alembic.sqlalchemy.org/en/latest/) * see https://clickhouse-sqlalchemy.readthedocs.io/en/latest/migrations.html @@ -29,7 +29,7 @@ description: > * [https://github.com/zifter/clickhouse-migrations](https://github.com/zifter/clickhouse-migrations) * [https://github.com/trushad0w/clickhouse-migrate](https://github.com/trushad0w/clickhouse-migrate) * phpMigrations - * [https://github.com/smi2/phpMigrationsClickhouse](https://github.com/smi2/phpMigrationsClickhouse) + * [https://github.com/smi2/phpMigrationsClickHouse](https://github.com/smi2/phpMigrationsClickhouse) * [https://habrahabr.ru/company/smi2/blog/317682/](https://habrahabr.ru/company/smi2/blog/317682/) * dbmate * [https://github.com/amacneil/dbmate#clickhouse](https://github.com/amacneil/dbmate#clickhouse) diff --git a/content/en/altinity-kb-setup-and-maintenance/schema-migration-tools/golang-migrate.md b/content/en/altinity-kb-setup-and-maintenance/schema-migration-tools/golang-migrate.md index a1d21e4675..c08be391ce 100644 --- a/content/en/altinity-kb-setup-and-maintenance/schema-migration-tools/golang-migrate.md +++ b/content/en/altinity-kb-setup-and-maintenance/schema-migration-tools/golang-migrate.md @@ -65,7 +65,7 @@ migrate -database 'clickhouse://localhost:9000' -path ./migrations down #### Replicated / Distributed / Cluster environments -`golang-migrate` supports a clustered Clickhouse environment since v4.15.0. +`golang-migrate` supports a clustered ClickHouse environment since v4.15.0. If you provide `x-cluster-name` query param, it will create the table to store migration data on the passed cluster. diff --git a/content/en/altinity-kb-setup-and-maintenance/uniqExact-to-uniq-combined.md b/content/en/altinity-kb-setup-and-maintenance/uniqExact-to-uniq-combined.md index 02e3de972b..9a8c835bdc 100644 --- a/content/en/altinity-kb-setup-and-maintenance/uniqExact-to-uniq-combined.md +++ b/content/en/altinity-kb-setup-and-maintenance/uniqExact-to-uniq-combined.md @@ -3,7 +3,7 @@ title: "How to convert uniqExact states to approximate uniq functions states" linkTitle: "Convert uniqExact to uniq(Combined)" weight: 100 description: >- - A way to convert to uniqExactState to other uniqStates (like uniqCombinedState) in Clickhouse. + A way to convert to uniqExactState to other uniqStates (like uniqCombinedState) in ClickHouse. --- ## uniqExactState diff --git a/content/en/altinity-kb-setup-and-maintenance/who-ate-my-cpu.md b/content/en/altinity-kb-setup-and-maintenance/who-ate-my-cpu.md index d5a3ed48fd..f61a1371f7 100644 --- a/content/en/altinity-kb-setup-and-maintenance/who-ate-my-cpu.md +++ b/content/en/altinity-kb-setup-and-maintenance/who-ate-my-cpu.md @@ -3,7 +3,7 @@ title: "Who ate my CPU" linkTitle: "Who ate my CPU" weight: 100 description: >- - Queries to find which subsytem of Clickhouse is using the most of CPU. + Queries to find which subsytem of ClickHouse is using the most of CPU. --- ## Merges diff --git a/content/en/altinity-kb-useful-queries/altinity-kb-number-of-active-parts-in-a-partition.md b/content/en/altinity-kb-useful-queries/altinity-kb-number-of-active-parts-in-a-partition.md index bfb793dbba..184893b938 100644 --- a/content/en/altinity-kb-useful-queries/altinity-kb-number-of-active-parts-in-a-partition.md +++ b/content/en/altinity-kb-useful-queries/altinity-kb-number-of-active-parts-in-a-partition.md @@ -4,7 +4,7 @@ linkTitle: "Number of active parts in a partition" description: > Number of active parts in a partition --- -## Q: Why do I have several active parts in a partition? Why Clickhouse does not merge them immediately? +## Q: Why do I have several active parts in a partition? Why ClickHouse does not merge them immediately? ### A: CH does not merge parts by time diff --git a/content/en/altinity-kb-useful-queries/query_log.md b/content/en/altinity-kb-useful-queries/query_log.md index bef2e20da8..5cde20648e 100644 --- a/content/en/altinity-kb-useful-queries/query_log.md +++ b/content/en/altinity-kb-useful-queries/query_log.md @@ -40,7 +40,7 @@ LIMIT 30 FORMAT Vertical --- modern Clickhouse +-- modern ClickHouse SELECT hostName() as host, normalized_query_hash, diff --git a/content/en/engines/_index.md b/content/en/engines/_index.md index 83fce3947a..53e16cbb13 100644 --- a/content/en/engines/_index.md +++ b/content/en/engines/_index.md @@ -5,10 +5,10 @@ keywords: - clickhouse engine - clickhouse mergetree description: > - Learn about ClickHouse engines, from MergeTree, Atomic Database to RocksDB. + Learn about ClickHouse® engines, from MergeTree, Atomic Database to RocksDB. weight: 1 --- -Generally: the **main** engine in Clickhouse is called [MergeTree](/engines/mergetree-table-engine-family/). It allows to store and process data on one server and feel all the advantages of Clickhouse. Basic usage of MergeTree does not require any special configuration, and you can start using it 'out of the box'. +Generally: the **main** engine in ClickHouse® is called [MergeTree](/engines/mergetree-table-engine-family/). It allows to store and process data on one server and feel all the advantages of ClickHouse. Basic usage of MergeTree does not require any special configuration, and you can start using it 'out of the box'. But one server and one copy of data are not fault-tolerant - something can happen with the server itself, with datacenter availability, etc. So you need to have the replica(s) - i.e. server(s) with the same data and which can 'substitute' the original server at any moment. @@ -16,9 +16,9 @@ To have an extra copy (replica) of your data you need to use [ReplicatedMergeTre [ZooKeeper installed](https://docs.altinity.com/operationsguide/clickhouse-zookeeper/zookeeper-installation/) and running. For tests, you can use one standalone Zookeeper instance, but for production usage, you should have zookeeper ensemble at least of 3 servers. -When you use ReplicatedMergeTree then the inserted data is copied automatically to all the replicas, but all the SELECTs are executed on the single server you have connected to. So you can have 5 replicas of your data, but if you will always connect to one replica - it will not 'share' / 'balance' that traffic automatically between all the replicas, one server will be loaded and the rest will generally do nothing. If you need that balancing of load between multiple replicas - you can use the internal 'loadbalancer' mechanism which is provided by Distributed engine of Clickhouse. As an alternative in that scenario you can work without [Distributed table](/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/distributed-table-cluster/), but with some external load balancer that will balance the requests between several replicas according to your specific rules or preferences, or just cluster-aware client which will pick one of the servers for the query time. +When you use ReplicatedMergeTree then the inserted data is copied automatically to all the replicas, but all the SELECTs are executed on the single server you have connected to. So you can have 5 replicas of your data, but if you will always connect to one replica - it will not 'share' / 'balance' that traffic automatically between all the replicas, one server will be loaded and the rest will generally do nothing. If you need that balancing of load between multiple replicas - you can use the internal 'loadbalancer' mechanism which is provided by Distributed engine of ClickHouse. As an alternative in that scenario you can work without [Distributed table](/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/distributed-table-cluster/), but with some external load balancer that will balance the requests between several replicas according to your specific rules or preferences, or just cluster-aware client which will pick one of the servers for the query time. -The Distributed engine does not store any data, but it can 'point' to the same ReplicatedMergeTree/MergeTree table on multiple servers. To use Distributed engine you need to configure `` settings in your ClickHouse server config file. +The Distributed engine does not store any data, but it can 'point' to the same ReplicatedMergeTree/MergeTree table on multiple servers. To use Distributed engine you need to configure `` settings in your ClickHouse server config file. So let's say you have 3 replicas of table `my_replicated_data` with ReplicatedMergeTree engine. You can create a table with Distributed engine called `my_distributed_replicated_data` which will 'point' to all of that 3 servers, and when you will select from that `my_distributed_replicated_data table` the select will be forwarded and executed on one of the replicas. So in that scenario, each replica will get 1/3 of requests (but each request still will be fully executed on one chosen replica). diff --git a/content/en/engines/altinity-kb-atomic-database-engine/_index.md b/content/en/engines/altinity-kb-atomic-database-engine/_index.md index 59e962fe07..62ff1d096d 100644 --- a/content/en/engines/altinity-kb-atomic-database-engine/_index.md +++ b/content/en/engines/altinity-kb-atomic-database-engine/_index.md @@ -6,7 +6,7 @@ description: > aliases: /engines/altinity-kb-atomic-database-engine/altinity-kb-implementation-details --- -In version 20.5 ClickHouse first introduced database engine=Atomic. +In version 20.5 ClickHouse® first introduced database engine=Atomic. Since version 20.10 it is a default database engine (before engine=Ordinary was used). @@ -68,9 +68,9 @@ SHOW CREATE TABLE xxx; /* or SELECT create_table_query FROM system.tables WHERE ### Q. Should I use Atomic or Ordinary for new setups? -All things inside clickhouse itself should work smoothly with `Atomic`. +All things inside ClickHouse itself should work smoothly with `Atomic`. -But some external tools - backup tools, things involving other kinds of direct manipulations with clickhouse files & folders may have issues with `Atomic`. +But some external tools - backup tools, things involving other kinds of direct manipulations with ClickHouse files & folders may have issues with `Atomic`. `Ordinary` layout on the filesystem is simpler. And the issues which address Atomic (lock-free renames, drops, atomic exchange of table) are not so critical in most cases. @@ -90,7 +90,7 @@ But some external tools - backup tools, things involving other kinds of direct m +
(like clickhouse-backup)
diff --git a/content/en/engines/altinity-kb-atomic-database-engine/how-to-convert-ordinary-to-atomic.md b/content/en/engines/altinity-kb-atomic-database-engine/how-to-convert-ordinary-to-atomic.md index fa70bce42a..d92b53960c 100644 --- a/content/en/engines/altinity-kb-atomic-database-engine/how-to-convert-ordinary-to-atomic.md +++ b/content/en/engines/altinity-kb-atomic-database-engine/how-to-convert-ordinary-to-atomic.md @@ -3,12 +3,12 @@ title: "How to Convert Ordinary to Atomic" linkTitle: "How to Convert Ordinary to Atomic" weight: 100 description: >- - Clickhouse Howto Convert Ordinary to Atomic + ClickHouse® How to Convert Ordinary to Atomic --- ## New, official way -* Implemented automatic conversion of database engine from `Ordinary` to `Atomic` (ClickHouse Server 22.8+). Create empty `convert_ordinary_to_atomic` file in `flags` directory and all `Ordinary` databases will be converted automatically on next server start. Resolves [#39546](https://github.com/ClickHouse/ClickHouse/issues/39546). [#39933](https://github.com/ClickHouse/ClickHouse/pull/39933) ([Alexander Tokmakov](https://github.com/tavplubix)) +* Implemented automatic conversion of database engine from `Ordinary` to `Atomic` (ClickHouse® Server 22.8+). Create empty `convert_ordinary_to_atomic` file in `flags` directory and all `Ordinary` databases will be converted automatically on next server start. Resolves [#39546](https://github.com/ClickHouse/ClickHouse/issues/39546). [#39933](https://github.com/ClickHouse/ClickHouse/pull/39933) ([Alexander Tokmakov](https://github.com/tavplubix)) * There can be some problems if the `default` database is Ordinary and fails for some reason. You can add: diff --git a/content/en/engines/mergetree-table-engine-family/altinity-kb-nulls-in-order-by.md b/content/en/engines/mergetree-table-engine-family/altinity-kb-nulls-in-order-by.md index c78db16b52..b6a9e80fbc 100644 --- a/content/en/engines/mergetree-table-engine-family/altinity-kb-nulls-in-order-by.md +++ b/content/en/engines/mergetree-table-engine-family/altinity-kb-nulls-in-order-by.md @@ -7,7 +7,7 @@ description: > 1) It is NOT RECOMMENDED for a general use 2) Use on your own risk -3) Use latest ClickHouse version if you need that. +3) Use latest ClickHouse® version if you need that. ```sql CREATE TABLE x diff --git a/content/en/engines/mergetree-table-engine-family/collapsing-vs-replacing.md b/content/en/engines/mergetree-table-engine-family/collapsing-vs-replacing.md index dc71219c67..27abdd650d 100644 --- a/content/en/engines/mergetree-table-engine-family/collapsing-vs-replacing.md +++ b/content/en/engines/mergetree-table-engine-family/collapsing-vs-replacing.md @@ -11,7 +11,7 @@ description: >- | ReplacingMergeTree | CollapsingMergeTree | |:-|:-| | + very easy to use (always replace) | - more complex (accounting-alike, put 'rollback' records to fix something) | -| + you don't need to store the previous state of the row | - you need to the store (somewhere) the previous state of the row, OR extract it from the table itself (point queries is not nice for ClickHouse) | +| + you don't need to store the previous state of the row | - you need to the store (somewhere) the previous state of the row, OR extract it from the table itself (point queries is not nice for ClickHouse®) | | - no deletes | + support deletes | | - w/o FINAL - you can can always see duplicates, you need always to 'pay' FINAL performance penalty | + properly crafted query can give correct results without final (i.e. `sum(amount * sign)` will be correct, no matter of you have duplicated or not) | | - only `uniq()`-alike things can be calculated in materialied views | + you can do basic counts & sums in materialized views | diff --git a/content/en/engines/mergetree-table-engine-family/merge-performance-final-optimize-by.md b/content/en/engines/mergetree-table-engine-family/merge-performance-final-optimize-by.md index 6130826acc..990d434b36 100644 --- a/content/en/engines/mergetree-table-engine-family/merge-performance-final-optimize-by.md +++ b/content/en/engines/mergetree-table-engine-family/merge-performance-final-optimize-by.md @@ -41,7 +41,7 @@ When using [deduplicate](/altinity-kb-schema-design/row-level-deduplication/) feature in `OPTIMIZE FINAL`, the question is which row will remain and won't be deduped? -For SELECT operations Clickhouse does not guarantee the order of the resultset unless you specify ORDER BY. This random ordering is affected by different parameters, like for example `max_threads`. +For SELECT operations ClickHouse® does not guarantee the order of the resultset unless you specify ORDER BY. This random ordering is affected by different parameters, like for example `max_threads`. In a merge operation ClickHouse reads rows sequentially in storage order, which is determined by ORDER BY specified in CREATE TABLE statement, and only the first unique row in that order survives deduplication. So it is a bit different from how SELECT actually works. As FINAL clause is used then ClickHouse will merge all rows across all partitions (If it is not specified then the merge operation will be done per partition), and so the first unique row of the first partition will survive deduplication. Merges are single-threaded because it is too complicated to apply merge ops in-parallel, and it generally makes no sense. diff --git a/content/en/engines/mergetree-table-engine-family/part-naming-and-mvcc.md b/content/en/engines/mergetree-table-engine-family/part-naming-and-mvcc.md index 3b3d4a4db4..27e0abbca8 100644 --- a/content/en/engines/mergetree-table-engine-family/part-naming-and-mvcc.md +++ b/content/en/engines/mergetree-table-engine-family/part-naming-and-mvcc.md @@ -50,7 +50,7 @@ As you can see every insert creates a new incremental block_number which is writ Those block numbering works in the scope of partition (for Replicated table) or globally across all partition (for plain MergeTree table). -ClickHouse always merge only continuous blocks . And new part names always refer to the minimum and maximum block numbers. +ClickHouse® always merge only continuous blocks . And new part names always refer to the minimum and maximum block numbers. ``` OPTIMIZE TABLE part_names; @@ -63,7 +63,7 @@ OPTIMIZE TABLE part_names; As you can see here - three parts (with block number 1,2,3) were merged and they formed the new part with name 1_3 as min/max block size. Level get incremented. -Now even while previous (merged) parts still exists in filesystem for a while (as inactive) clickhouse is smart enough to understand +Now even while previous (merged) parts still exists in filesystem for a while (as inactive) ClickHouse is smart enough to understand that new part 'covers' same range of blocks as 3 parts of the prev 'generation' There might be a fifth section in the part name, data version. diff --git a/content/en/engines/mergetree-table-engine-family/pick-keys.md b/content/en/engines/mergetree-table-engine-family/pick-keys.md index 2899df3181..7343f261c1 100644 --- a/content/en/engines/mergetree-table-engine-family/pick-keys.md +++ b/content/en/engines/mergetree-table-engine-family/pick-keys.md @@ -1,5 +1,5 @@ --- -title: "How to pick an ORDER BY / PRIMARY KEY / PARTITION BY for the MergeTree-family table" +title: "How to pick an ORDER BY / PRIMARY KEY / PARTITION BY for the MergeTree family table" linkTitle: "Proper ordering and partitioning the MergeTree tables" weight: 100 description: >- @@ -15,7 +15,7 @@ Practical approach to create an good ORDER BY for a table: 3. Next column is more cardinal, less important. It can be rounded time sometimes, or `site_id`, or `source_id`, or `group_id` or something similar. 4. repeat p.3 once again (or few times) 5. if you added already all columns important for filtering and you still not addressing a single row with you pk - you can add more columns which can help to put similar records close to each other (to improve the compression) -6. if you have something like hierarchy / tree-like relations between the columns - put there the records from 'root' to 'leaves' for example (continent, country, cityname). This way clickhouse can do lookup by country / city even if continent is not specified (it will just 'check all continents') +6. if you have something like hierarchy / tree-like relations between the columns - put there the records from 'root' to 'leaves' for example (continent, country, cityname). This way ClickHouse® can do lookup by country / city even if continent is not specified (it will just 'check all continents') special variants of MergeTree may require special ORDER BY to make the record unique etc. 7. For [timeseries](https://altinity.com/blog/2019-5-23-handling-variable-time-series-efficiently-in-clickhouse) it usually make sense to put timestamp as latest column in ORDER BY, it helps with putting the same data near by for better locality. There is only 2 major patterns for timestamps in ORDER BY: (..., toStartOf(Day|Hour|...)(timestamp), ..., timestamp) and (..., timestamp). First one is useful when your often query small part of table partition. (table partitioned by months and your read only 1-4 days 90% of times) @@ -137,7 +137,7 @@ Ok. 0 rows in set. Elapsed: 0.649 sec. Processed 125.97 million rows, 629.86 MB (194.17 million rows/s., 970.84 MB/s.) ``` -If we change the `ORDER BY` expression in the query, Clickhouse will need to retrieve the rows and reorder them: +If we change the `ORDER BY` expression in the query, ClickHouse will need to retrieve the rows and reorder them: ```sql SELECT * FROM order_test diff --git a/content/en/engines/mergetree-table-engine-family/replacingmergetree/_index.md b/content/en/engines/mergetree-table-engine-family/replacingmergetree/_index.md index 2ca4e29dbc..df2cf33668 100644 --- a/content/en/engines/mergetree-table-engine-family/replacingmergetree/_index.md +++ b/content/en/engines/mergetree-table-engine-family/replacingmergetree/_index.md @@ -7,7 +7,7 @@ aliases: /engines/replacingmergetree --- [ReplacingMergeTree](https://altinity.com/blog/clickhouse-replacingmergetree-explained-the-good-the-bad-and-the-ugly) -is a powerful ClickHouse MergeTree engine. It is one of the techniques that can be used to guarantee unicity or exactly once delivery in ClickHouse. +is a powerful ClickHouse® MergeTree engine. It is one of the techniques that can be used to guarantee unicity or exactly once delivery in ClickHouse. ## General Operations @@ -39,7 +39,7 @@ See these links for reference: Since 23.2, profile level ```final=1``` can force final automatically, see https://github.com/ClickHouse/ClickHouse/pull/40945 -Clickhouse merge parts only in scope of single partition, so if two rows with the same replacing key would land in different partitions, they would **never** be merged in single row. FINAL keyword works in other way, it merge all rows across all partitions. But that behavior can be changed via`do_not_merge_across_partitions_select_final` setting. +ClickHouse merge parts only in scope of single partition, so if two rows with the same replacing key would land in different partitions, they would **never** be merged in single row. FINAL keyword works in other way, it merge all rows across all partitions. But that behavior can be changed via`do_not_merge_across_partitions_select_final` setting. ```sql CREATE TABLE repl_tbl_part diff --git a/content/en/engines/mergetree-table-engine-family/replacingmergetree/altinity-kb-replacingmergetree-does-not-collapse-duplicates.md b/content/en/engines/mergetree-table-engine-family/replacingmergetree/altinity-kb-replacingmergetree-does-not-collapse-duplicates.md index 4b509b2b19..4705ebe5ae 100644 --- a/content/en/engines/mergetree-table-engine-family/replacingmergetree/altinity-kb-replacingmergetree-does-not-collapse-duplicates.md +++ b/content/en/engines/mergetree-table-engine-family/replacingmergetree/altinity-kb-replacingmergetree-does-not-collapse-duplicates.md @@ -11,7 +11,7 @@ with ReplacingMergeTree table, but even if I call optimize on it, the parts don' Merges are eventual and may never happen. It depends on the number of inserts that happened after, the number of parts in the partition, size of parts. If the total size of input parts are greater than the maximum part size then they will never be merged. -[https://clickhouse.tech/docs/en/operations/settings/merge-tree-settings/\#max-bytes-to-merge-at-max-space-in-pool](https://clickhouse.tech/docs/en/operations/settings/merge-tree-settings/#max-bytes-to-merge-at-max-space-in-pool) +[https://clickhouse.com/docs/en/operations/settings/merge-tree-settings#max-bytes-to-merge-at-max-space-in-pool](https://clickhouse.com/docs/en/operations/settings/merge-tree-settings#max-bytes-to-merge-at-max-space-in-pool) -[https://clickhouse.tech/docs/en/engines/table-engines/mergetree-family/replacingmergetree/](https://clickhouse.tech/docs/en/engines/table-engines/mergetree-family/replacingmergetree/) +[https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/replacingmergetree](https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/replacingmergetree) _ReplacingMergeTree is suitable for clearing out duplicate data in the background in order to save space, but it doesn’t guarantee the absence of duplicates._ diff --git a/content/en/engines/mergetree-table-engine-family/skip-index.md b/content/en/engines/mergetree-table-engine-family/skip-index.md index 4bcb0d5951..b0910a03c0 100644 --- a/content/en/engines/mergetree-table-engine-family/skip-index.md +++ b/content/en/engines/mergetree-table-engine-family/skip-index.md @@ -7,7 +7,7 @@ description: > {{% alert title="Warning" color="warning" %}} When you are creating [skip indexes](https://altinity.com/blog/clickhouse-black-magic-skipping-indices) -in non-regular (Replicated)MergeTree tables over non ORDER BY columns. ClickHouse applies index condition on the first step of query execution, so it's possible to get outdated rows. +in non-regular (Replicated)MergeTree tables over non ORDER BY columns. ClickHouse® applies index condition on the first step of query execution, so it's possible to get outdated rows. {{% /alert %}} ```sql diff --git a/content/en/engines/mergetree-table-engine-family/versioned-collapsing-mergetree.md b/content/en/engines/mergetree-table-engine-family/versioned-collapsing-mergetree.md index ea9148164f..e2fcb259f2 100644 --- a/content/en/engines/mergetree-table-engine-family/versioned-collapsing-mergetree.md +++ b/content/en/engines/mergetree-table-engine-family/versioned-collapsing-mergetree.md @@ -6,14 +6,14 @@ description: How to aggregate mutating event stream with duplicates ### Challenges with mutated data -When you have an incoming event stream with duplicates, updates, and deletes, building a consistent row state inside the Clickhouse table is a big challenge. +When you have an incoming event stream with duplicates, updates, and deletes, building a consistent row state inside the ClickHouse® table is a big challenge. -The UPDATE/DELETE approach in the OLTP world won’t help with OLAP databases tuned to handle big batches. UPDATE/DELETE operations in Clickhouse are executed as “mutations,” rewriting a lot of data and being relatively slow. You can’t run such operations very often, as for OLTP databases. But the UPSERT operation (insert and replace) runs fast with the ReplacingMergeTree Engine. It’s even set as the default mode for INSERT without any special keyword. We can emulate UPDATE (or even DELETE) with the UPSERT operation. +The UPDATE/DELETE approach in the OLTP world won’t help with OLAP databases tuned to handle big batches. UPDATE/DELETE operations in ClickHouse are executed as “mutations,” rewriting a lot of data and being relatively slow. You can’t run such operations very often, as for OLTP databases. But the UPSERT operation (insert and replace) runs fast with the ReplacingMergeTree Engine. It’s even set as the default mode for INSERT without any special keyword. We can emulate UPDATE (or even DELETE) with the UPSERT operation. There are a lot of [blog posts](https://altinity.com/blog/clickhouse-replacingmergetree-explained-the-good-the-bad-and-the-ugly) on how to use ReplacingMergeTree Engine to handle mutated data streams. A properly designed table schema with ReplacingMergeTree Engine is a good instrument for building the DWH Dimensions table. But when maintaining metrics in Fact tables, there are several problems: -- it’s not possible to use a valuable Clickhouse feature - online aggregation of incoming data by Materialized Views or Projections on top of the ReplacingMT table, because duplicates and updates will not be deduplicated by the engine during inserts, and calculated aggregates (like sum or count) will be incorrect. For significant amounts of data, it’s become critical because aggregating raw data during report queries will take too much time. -- unfinished support for DELETEs. While in the newest versions of Clickhouse, it’s possible to add the is_deleted to ReplacingMergeTree parameters, the necessity of manually filtering out deleted rows after FINAL processing makes that feature less useful. +- it’s not possible to use a valuable ClickHouse feature - online aggregation of incoming data by Materialized Views or Projections on top of the ReplacingMT table, because duplicates and updates will not be deduplicated by the engine during inserts, and calculated aggregates (like sum or count) will be incorrect. For significant amounts of data, it’s become critical because aggregating raw data during report queries will take too much time. +- unfinished support for DELETEs. While in the newest versions of ClickHouse, it’s possible to add the is_deleted to ReplacingMergeTree parameters, the necessity of manually filtering out deleted rows after FINAL processing makes that feature less useful. - Mutated data should be localized to the same partition. If the “replacing” row is saved to a partition different from the previous one, the report query will be much slower or produce unexpected results. ```sql @@ -40,7 +40,7 @@ You will get a row with ‘first’, not an empty set, as one might expect with ### Collapsing -Clickhouse has other table engines, such as CollapsingMergeTree and VersionedCollapsingMergeTree, that can be used even better for UPSERT operation. +ClickHouse has other table engines, such as CollapsingMergeTree and VersionedCollapsingMergeTree, that can be used even better for UPSERT operation. Both work by inserting a “rollback row” to compensate for the previous insert. The difference between CollapsingMergeTree and VersionedCollapsingMergeTree is in the algorithm of collapsing. For Cluster configurations, it’s essential to understand which row came first and who should replace whom. That is why using ReplicatedVersionedCollapsingMergeTree is mandatory for Replicated Clusters. @@ -89,7 +89,7 @@ With VersionedCollapsingMergeTree, we can use more partition strategies, even wi ### Row deduplication -There are several ways to remove duplicates from the event stream. The most effective feature is block deduplication, which occurs when Clickhouse drops incoming blocks with the same checksum (or tag). However, this requires building a smart ingestor capable of saving positions in a transactional manner. +There are several ways to remove duplicates from the event stream. The most effective feature is block deduplication, which occurs when ClickHouse drops incoming blocks with the same checksum (or tag). However, this requires building a smart ingestor capable of saving positions in a transactional manner. However, another method is possible: verifying whether a particular row already exists in the destination table to avoid redundant insertions. Together with block deduplication, that method also avoids using ReplacingMergeTree and FINAL during query time. @@ -145,7 +145,7 @@ I read more data from the Example2 table than from Example1. Instead of simply c For UPSERT, the collapsing algorithm requires inserting two rows. So, I need to create two rows from any row that is found in the local table. It´s an essential part of the suggested approach, which allows me to produce proper rows for inserting with a human-readable code with clear if() statements. That is why I execute arrayJoin while reading old data. -Don’t try to run the code above. It’s just a short explanation of the idea, lucking many needed elements. +Don’t try to run the code above. It’s just a short explanation of the idea, lacking many needed elements. ### UPSERT by Collapsing @@ -334,7 +334,7 @@ format Null select '---',timeSpent(),'GROUP BY OPTIMIZED'; ``` -You can use fiddle or clickhouse-local to run such a test: +You can use fiddle or `clickhouse-local` to run such a test: ```bash cat test.sql | clickhouse-local -nm @@ -551,7 +551,7 @@ However, it can lead to incorrect duplicate processing in some rare situations. - two events happen in the source database (insert and delete) for the very same ID - only insert event create a duplicate (delete event does not duplicate) - all 3 events (delete and two inserts) were processed in separate batches -- Clickhouse executes the merge operation very quickly after the first INSER and DELETE events are received, effectively removing the row with that ID from the table +- ClickHouse executes the merge operation very quickly after the first INSERT and DELETE events are received, effectively removing the row with that ID from the table - the second (duplicated) insert is saved to the table because we lost the information about the first insertion The probability of such a sequence is relatively low, especially in normal operations when the amount of DELETEs is not too significant. Processing events in big batches will reduce the probability even more. diff --git a/content/en/upgrade/clickhouse-feature-report.md b/content/en/upgrade/clickhouse-feature-report.md index 6742d5a171..ee34fcf435 100644 --- a/content/en/upgrade/clickhouse-feature-report.md +++ b/content/en/upgrade/clickhouse-feature-report.md @@ -1,8 +1,8 @@ --- -title: "Clickhouse Function/Engines/Settings Report" -linkTitle: "Clickhouse Function/Engines/Settings Report" +title: "ClickHouse® Function/Engines/Settings Report" +linkTitle: "ClickHouse® Function/Engines/Settings Report" description: > - Report on ClickHouse functions, table functions, table engines, system and MergeTree settings, with availability information. + Report on ClickHouse® functions, table functions, table engines, system and MergeTree settings, with availability information. --- -Follow this link for a complete report on ClickHouse features with their availability: https://github.com/anselmodadams/ChMisc/blob/main/report/report.md. It is frequently updated (at least once a month). +Follow this link for a complete report on ClickHouse® features with their availability: https://github.com/anselmodadams/ChMisc/blob/main/report/report.md. It is frequently updated (at least once a month). diff --git a/layouts/partials/toc.html b/layouts/partials/toc.html index 4941b2bf19..63124da2b5 100644 --- a/layouts/partials/toc.html +++ b/layouts/partials/toc.html @@ -8,5 +8,5 @@ {{ partial "social-links.html" . }}
- Altinity®, Altinity.Cloud®, and Altinity Stable® are registered trademarks of Altinity, Inc. ClickHouse® is a registered trademark of ClickHouse, Inc.; Altinity is not affiliated with or associated with ClickHouse, Inc. + Altinity®, Altinity.Cloud®, and Altinity Stable® are registered trademarks of Altinity, Inc. ClickHouse® is a registered trademark of ClickHouse, Inc.; Altinity is not affiliated with or associated with ClickHouse, Inc. Kafka, Kubernetes, MySQL, and PostgreSQL are trademarks and property of their respective owners.
From a4a96398d6e97ac2935110b426947487e2e202d9 Mon Sep 17 00:00:00 2001 From: Doug Tidwell Date: Tue, 30 Jul 2024 22:09:50 -0400 Subject: [PATCH 14/17] Site cleanup, mostly minor changes --- content/en/altinity-kb-dictionaries/_index.md | 6 +- .../dictionaries-and-arrays.md | 2 +- .../mysql8-source-for-dictionaries.md | 2 +- .../partial-updates.md | 2 +- .../security-named-collections.md | 2 +- .../array-like-memory-usage.md | 2 +- .../assumenotnull-and-friends.md | 2 +- ...-to-encode-decode-quantiletdigest-state.md | 2 +- .../kurt_skew_statistics.md | 3 +- .../ClickHouse_python_drivers.md | 32 +++++------ content/en/altinity-kb-integrations/Spark.md | 8 +-- content/en/altinity-kb-integrations/_index.md | 2 +- .../altinity-cloud/_index.md | 2 +- .../altinity-cloud-access-management.md | 2 +- .../altinity-kb-google-s3-gcs.md | 2 +- .../altinity-kb-kafka/_index.md | 2 +- ...tinity-kb-adjusting-librdkafka-settings.md | 4 +- .../altinity-kb-exactly-once-semantics.md | 2 +- .../altinity-kb-kafka-main-parsing-loop.md | 2 +- .../altinity-kb-kafka-mv-consuming.md | 2 +- .../altinity-kb-kafka-parallel-consuming.md | 2 +- .../altinity-kb-rewind-fast-forward-replay.md | 2 +- ...round_message_broker_schedule_pool_size.md | 2 +- .../altinity-kb-kafka/error-handling.md | 2 +- .../kafka-schema-inference.md | 16 +++--- .../en/altinity-kb-integrations/bi-tools.md | 2 +- .../clickhouse-odbc.md | 46 +++++++-------- .../mysql-clickhouse.md | 18 +++--- content/en/altinity-kb-interfaces/_index.md | 2 +- .../altinity-kb-clickhouse-client.md | 4 +- content/en/altinity-kb-kubernetes/_index.md | 8 +-- ...e-issues-with-running-clickhouse-in-k8s.md | 10 ++-- .../en/altinity-kb-schema-design/_index.md | 2 +- ...inity-kb-dictionaries-vs-lowcardinality.md | 2 +- .../altinity-kb-jsoneachrow-tuples-and-mvs.md | 2 +- .../backfill_column.md | 2 +- ...trics-registered-from-the-single-source.md | 4 +- .../codecs/_index.md | 2 +- .../floats-vs-decimals.md | 2 +- .../how-much-is-too-much.md | 6 +- .../ingestion-aggregate-function.md | 2 +- .../insert_deduplication.md | 2 +- .../materialized-views/_index.md | 2 +- ...fill-populate-mv-in-a-controlled-manner.md | 2 +- .../idempotent_inserts_mv.md | 2 +- .../preaggregations.md | 2 +- .../row-level-deduplication.md | 12 ++-- .../two-columns-indexing.md | 2 +- .../_index.md | 2 +- .../alters.md | 6 +- .../altinity-kb-aggressive_merges.md | 4 +- ...altinity-kb-check-replication-ddl-queue.md | 2 +- .../altinity-kb-clickhouse-in-docker.md | 8 +-- ...y-kb-converting-mergetree-to-replicated.md | 2 +- .../altinity-kb-data-migration/_index.md | 10 ++-- .../add_remove_replica.md | 8 +-- .../altinity-kb-clickhouse-copier/_index.md | 8 +-- ...ity-kb-clickhouse-copier-kubernetes-job.md | 8 +-- .../distributed-table-cluster.md | 2 +- .../mssql-clickhouse.md | 6 +- .../remote-table-function.md | 2 +- .../altinity-kb-data-migration/rsync.md | 2 +- .../altinity-kb-ddlworker/_index.md | 4 +- ...ed-hosts-0-of-them-are-currently-active.md | 6 +- .../altinity-kb-memory-overcommit.md | 2 +- .../altinity-kb-monitoring.md | 22 ++++---- ...nity-kb-moving-table-to-another-device..md | 6 +- .../aws-s3-recipes.md | 7 ++- .../s3_and_mutations.md | 2 +- .../s3_cache_example.md | 2 +- .../altinity-kb-s3-object-storage/s3disk.md | 2 +- .../altinity-kb-server-config-files.md | 6 +- .../altinity-kb-settings-to-adjust.md | 2 +- .../altinity-kb-zookeeper/_index.md | 2 +- .../altinity-kb-proper-setup.md | 2 +- ...rom-complete-metadata-loss-in-zookeeper.md | 6 +- .../altinity-kb-zookeeper-backup.md | 6 +- .../clickhouse-keeper-service.md | 2 +- .../clickhouse-keeper.md | 4 +- .../altinity-kb-zookeeper/install_ubuntu.md | 4 +- ...vm-sizes-and-garbage-collector-settings.md | 4 +- ...eper-cluster-migration-k8s-node-storage.md | 2 +- ...atibility-greater-than-21.x-and-earlier.md | 4 +- .../asynchronous_metrics_descr.md | 2 +- .../aws-ec2-storage.md | 4 +- .../cgroups_k8s.md | 4 +- .../change-me.md | 6 +- .../clickhouse-backup-diff.md | 10 ++-- .../clickhouse-backup.md | 10 ++-- .../clickhouse-deployment-plan.md | 14 ++--- .../clickhouse-operator.md | 8 +-- .../clickhouse-versions.md | 14 ++--- .../_index.md | 2 +- .../cluster-configuration-faq.md | 8 +-- .../cluster-configuration-process.md | 4 +- .../hardening-clickhouse-security.md | 6 +- .../hardware-requirements.md | 4 +- .../network-configuration.md | 2 +- .../connection-problems.md | 24 ++++---- .../custom_settings.md | 2 +- .../disk_encryption.md | 2 +- .../filesystems.md | 20 +++---- .../high-cpu-usage.md | 2 +- .../load-balancers.md | 10 ++-- .../logging.md | 2 +- ...itoring-operator-exporter-compatibility.md | 7 +-- .../altinity-kb-setup-and-maintenance/rbac.md | 6 +- .../schema-migration-tools/_index.md | 6 +- .../schema-migration-tools/golang-migrate.md | 2 +- ...ource-pars-size-is-greater-than-maximum.md | 4 +- .../sysall.md | 2 +- .../uniq-uuid-doubled-clickhouse-upgrade.md | 8 +-- .../uniqExact-to-uniq-combined.md | 4 +- .../useful-setting-to-turn-on.md | 2 +- .../who-ate-my-cpu.md | 2 +- .../zookeeper-session-expired.md | 56 +++++++++---------- .../en/altinity-kb-useful-queries/_index.md | 2 +- ...b-number-of-active-parts-in-a-partition.md | 2 +- .../altinity-kb-useful-queries/debug-hang.md | 2 +- .../detached-parts.md | 4 +- .../altinity-kb-useful-queries/query_log.md | 4 +- content/en/upgrade/removing-empty-parts.md | 2 +- content/en/upgrade/removing-lost-parts.md | 2 +- content/en/upgrade/vulnerabilities.md | 2 +- content/en/using-this-knowledgebase/_index.md | 2 +- 125 files changed, 344 insertions(+), 349 deletions(-) diff --git a/content/en/altinity-kb-dictionaries/_index.md b/content/en/altinity-kb-dictionaries/_index.md index 15cc00ca4e..96182adc6e 100644 --- a/content/en/altinity-kb-dictionaries/_index.md +++ b/content/en/altinity-kb-dictionaries/_index.md @@ -6,11 +6,11 @@ keywords: - clickhouse arrays - postgresql dictionary description: > - All you need to know about creating and using ClickHouse dictionaries. + All you need to know about creating and using ClickHouse® dictionaries. weight: 11 --- -For more information on ClickHouse Dictionaries, see +For more information on ClickHouse® Dictionaries, see the presentation [https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup34/clickhouse_integration.pdf](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup34/clickhouse_integration.pdf), slides 82-95, video https://youtu.be/728Yywcd5ys?t=10642 @@ -21,6 +21,6 @@ https://altinity.com/blog/2020/5/19/clickhouse-dictionaries-reloaded And some videos: https://www.youtube.com/watch?v=FsVrFbcyb84 -Also there 3rd party articles on the same subj. +Also there are 3rd party articles on the same subj. https://prog.world/how-to-create-and-use-dictionaries-in-clickhouse/ diff --git a/content/en/altinity-kb-dictionaries/dictionaries-and-arrays.md b/content/en/altinity-kb-dictionaries/dictionaries-and-arrays.md index fcbc0c5aac..8cfa4b0027 100644 --- a/content/en/altinity-kb-dictionaries/dictionaries-and-arrays.md +++ b/content/en/altinity-kb-dictionaries/dictionaries-and-arrays.md @@ -4,7 +4,7 @@ linkTitle: "Dictionaries & arrays" description: > Dictionaries & arrays --- -## Dictionary with ClickHouse table as a source +## Dictionary with ClickHouse® table as a source ### Test data diff --git a/content/en/altinity-kb-dictionaries/mysql8-source-for-dictionaries.md b/content/en/altinity-kb-dictionaries/mysql8-source-for-dictionaries.md index 3554bcda00..650519dff6 100644 --- a/content/en/altinity-kb-dictionaries/mysql8-source-for-dictionaries.md +++ b/content/en/altinity-kb-dictionaries/mysql8-source-for-dictionaries.md @@ -6,7 +6,7 @@ description: > --- #### Authorization -MySQL8 used default authorization plugin `caching_sha2_password`. Unfortunately, `libmysql` which currently used (21.4-) in clickhouse is not. +MySQL8 used default authorization plugin `caching_sha2_password`. Unfortunately, `libmysql` which currently used (21.4-) in ClickHouse® is not. You can fix it during create custom user with `mysql_native_password` authentication plugin. diff --git a/content/en/altinity-kb-dictionaries/partial-updates.md b/content/en/altinity-kb-dictionaries/partial-updates.md index c832b20b1f..8efd312c3a 100644 --- a/content/en/altinity-kb-dictionaries/partial-updates.md +++ b/content/en/altinity-kb-dictionaries/partial-updates.md @@ -4,7 +4,7 @@ linkTitle: "Partial updates" description: > Partial updates --- -ClickHouse is able to fetch from a source only updated rows. You need to define `update_field` section. +ClickHouse® is able to fetch from a source only updated rows. You need to define `update_field` section. As an example, We have a table in an external source MySQL, PG, HTTP, ... defined with the following code sample: diff --git a/content/en/altinity-kb-dictionaries/security-named-collections.md b/content/en/altinity-kb-dictionaries/security-named-collections.md index 907b6d113c..5c34dd11d8 100644 --- a/content/en/altinity-kb-dictionaries/security-named-collections.md +++ b/content/en/altinity-kb-dictionaries/security-named-collections.md @@ -6,7 +6,7 @@ description: > --- -## Dictionary with ClickHouse table as a source with named collections +## Dictionary with ClickHouse® table as a source with named collections ### Data for connecting to external sources can be stored in named collections diff --git a/content/en/altinity-kb-functions/array-like-memory-usage.md b/content/en/altinity-kb-functions/array-like-memory-usage.md index a88ceec5a1..86cebfafe2 100644 --- a/content/en/altinity-kb-functions/array-like-memory-usage.md +++ b/content/en/altinity-kb-functions/array-like-memory-usage.md @@ -7,7 +7,7 @@ description: > ## arrayMap-like functions memory usage calculation. -In order to calculate arrayMap or similar array* functions ClickHouse temporarily does arrayJoin-like operation, which in certain conditions can lead to huge memory usage for big arrays. +In order to calculate arrayMap or similar array* functions ClickHouse® temporarily does arrayJoin-like operation, which in certain conditions can lead to huge memory usage for big arrays. So for example, you have 2 columns: diff --git a/content/en/altinity-kb-functions/assumenotnull-and-friends.md b/content/en/altinity-kb-functions/assumenotnull-and-friends.md index 7ba6ca1ee5..760d2afbbf 100644 --- a/content/en/altinity-kb-functions/assumenotnull-and-friends.md +++ b/content/en/altinity-kb-functions/assumenotnull-and-friends.md @@ -89,7 +89,7 @@ Code: 36, e.displayText() = DB::Exception: Unexpected value 0 in enum, Stack tra ``` {{% alert title="Info" color="info" %}} -Null values in ClickHouse are stored in a separate dictionary: is this value Null. And for faster dispatch of functions there is no check on Null value while function execution, so functions like plus can modify internal column value (which has default value). In normal conditions it’s not a problem because on read attempt, ClickHouse first would check the Null dictionary and return value from column itself for non-Nulls only. And `assumeNotNull` function just ignores this Null dictionary. So it would return only column values, and in certain cases it’s possible to have unexpected results. +Null values in ClickHouse® are stored in a separate dictionary: is this value Null. And for faster dispatch of functions there is no check on Null value while function execution, so functions like plus can modify internal column value (which has default value). In normal conditions it’s not a problem because on read attempt, ClickHouse first would check the Null dictionary and return value from column itself for non-Nulls only. And `assumeNotNull` function just ignores this Null dictionary. So it would return only column values, and in certain cases it’s possible to have unexpected results. {{% /alert %}} If it's possible to have Null values, it's better to use `ifNull` function instead. diff --git a/content/en/altinity-kb-functions/how-to-encode-decode-quantiletdigest-state.md b/content/en/altinity-kb-functions/how-to-encode-decode-quantiletdigest-state.md index 75edac649a..01c1f0f4d4 100644 --- a/content/en/altinity-kb-functions/how-to-encode-decode-quantiletdigest-state.md +++ b/content/en/altinity-kb-functions/how-to-encode-decode-quantiletdigest-state.md @@ -3,7 +3,7 @@ title: "How to encode/decode quantileTDigest states from/to list of centroids" linkTitle: "Encoding and Decoding of quantileTDigest states" weight: 100 description: >- - A way to export or import quantileTDigest states from/into ClickHouse. + A way to export or import quantileTDigest states from/into ClickHouse® --- ## quantileTDigestState diff --git a/content/en/altinity-kb-functions/kurt_skew_statistics.md b/content/en/altinity-kb-functions/kurt_skew_statistics.md index 50e268eeb4..84cd3b4950 100644 --- a/content/en/altinity-kb-functions/kurt_skew_statistics.md +++ b/content/en/altinity-kb-functions/kurt_skew_statistics.md @@ -1,5 +1,6 @@ --- -title: "kurt & skew statistical functions in ClickHouse" +title: "kurt & skew statistical functions in ClickHouse® +" linkTitle: "kurt & skew" weight: 100 description: >- diff --git a/content/en/altinity-kb-integrations/ClickHouse_python_drivers.md b/content/en/altinity-kb-integrations/ClickHouse_python_drivers.md index 433d35880d..fcc49958dc 100644 --- a/content/en/altinity-kb-integrations/ClickHouse_python_drivers.md +++ b/content/en/altinity-kb-integrations/ClickHouse_python_drivers.md @@ -1,16 +1,14 @@ --- -title: "ClickHouse python drivers" -linkTitle: "ClickHouse python drivers" +title: "ClickHouse® python drivers" +linkTitle: "ClickHouse® python drivers" weight: 100 description: >- - Python main drivers/clients for ClickHouse + Python main drivers/clients for ClickHouse® --- -## ClickHouse python drivers +There are two main python drivers that can be used with ClickHouse®. They all have their different set of features and use cases: -There are two main python drivers that can be used with ClickHouse. They all have their different set of features and use cases: - -### ClickHouse driver AKA [clickhouse-driver](https://clickhouse-driver.readthedocs.io/en/latest/) +## ClickHouse driver AKA [clickhouse-driver](https://clickhouse-driver.readthedocs.io/en/latest/) The **`clickhouse-driver`** is a Python library used for interacting with ClickHouse. Here's a summary of its features: @@ -30,15 +28,15 @@ The **`clickhouse-driver`** is a Python library used for interacting with ClickH - Good Pandas/Numpy support: [https://clickhouse-driver.readthedocs.io/en/latest/features.html#numpy-pandas-support](https://clickhouse-driver.readthedocs.io/en/latest/features.html#numpy-pandas-support) - Good SQLALchemy support: [https://pypi.org/project/clickhouse-sqlalchemy/](https://pypi.org/project/clickhouse-sqlalchemy/) -This was the first python driver for clickhouse. It has a mature codebase. By default clickhouse drivers uses [synchronous code](https://clickhouse-driver.readthedocs.io/en/latest/quickstart.html#async-and-multithreading). There is a wrapper to convert code to asynchronous, [https://github.com/long2ice/asynch](https://github.com/long2ice/asynch) +This was the first python driver for ClickHouse. It has a mature codebase. By default ClickHouse drivers uses [synchronous code](https://clickhouse-driver.readthedocs.io/en/latest/quickstart.html#async-and-multithreading). There is a wrapper to convert code to asynchronous, [https://github.com/long2ice/asynch](https://github.com/long2ice/asynch) Here you can get a basic working example from Altinity repo for ingestion/selection using clickhouse-driver: [https://github.com/lesandie/clickhouse-tests/blob/main/scripts/test_ch_driver.py](https://github.com/lesandie/clickhouse-tests/blob/main/scripts/test_ch_driver.py) -### ClickHouse-connect AKA [clickhouse-connect](https://clickhouse.com/docs/en/integrations/python) +## ClickHouse-connect AKA [clickhouse-connect](https://clickhouse.com/docs/en/integrations/python) -The ClickHouse Connect Python driver is the ClickHouse, Inc supported-official Python library. Here's a summary of its key features: +The ClickHouse Connect Python driver is the ClickHouse, Inc. supported-official Python library. Here's a summary of its key features: 1. **Connectivity**: allows Python applications to connect to ClickHouse servers over HTTP Interface (8123/8443 ports). 2. **Compatibility**: The driver is compatible with Python 3.x versions, ensuring that it can be used with modern Python applications without compatibility issues. @@ -68,19 +66,19 @@ Also some Altinity examples from repo: You can clone the repo and use the helper files like `DDL.sql` to setup some tests. -Clickhouse-connect can use a connection pooler (based on urllib3) [https://clickhouse.com/docs/en/integrations/python#customizing-the-http-connection-pool](https://clickhouse.com/docs/en/integrations/python#customizing-the-http-connection-pool) +`clickhouse-connect` can use a connection pooler (based on urllib3) [https://clickhouse.com/docs/en/integrations/python#customizing-the-http-connection-pool](https://clickhouse.com/docs/en/integrations/python#customizing-the-http-connection-pool) ### Most common use cases: #### Managing ClickHouse `session_id`: -- clickhouse-driver +- `clickhouse-driver` - Because it is using the Native Interface `session_id` is managed internally by clickhouse, so it is very rare (unless using asyncio) to get: `Code: 373. DB::Exception: Session is locked by a concurrent client. (SESSION_IS_LOCKED)` . -- clickhouse-connect: How to use clickhouse-connect in a pythonic way and avoid getting `SESSION_IS_LOCKED` exceptions: +- `clickhouse-connect`: How to use `clickhouse-connect` in a pythonic way and avoid getting `SESSION_IS_LOCKED` exceptions: - [https://clickhouse.com/docs/en/integrations/python#managing-clickhouse-session-ids](https://clickhouse.com/docs/en/integrations/python#managing-clickhouse-session-ids) - If you want to specify a session_id per query you should be able to use the setting dictionary to pass a `session_id` for each query (note that ClickHouse will automatically generate a `session_id` if none is provided). @@ -90,19 +88,19 @@ Clickhouse-connect can use a connection pooler (based on urllib3) [https://click ``` -Also in clickhouse documentation some explanation how to set `session_id` with another approach: [https://clickhouse.com/docs/en/integrations/python#managing-clickhouse-session-ids](https://clickhouse.com/docs/en/integrations/python#managing-clickhouse-session-ids) +Also in ClickHouse documentation some explanation how to set `session_id` with another approach: [https://clickhouse.com/docs/en/integrations/python#managing-clickhouse-session-ids](https://clickhouse.com/docs/en/integrations/python#managing-clickhouse-session-ids) [ClickHouse Connect Driver API | ClickHouse Docs](https://clickhouse.com/docs/en/integrations/language-clients/python/driver-api#common-method-arguments) [Best practices with flask · Issue #73 · ClickHouse/clickhouse-connect](https://github.com/ClickHouse/clickhouse-connect/issues/73#issuecomment-1325280242) -#### clickhouse-connect & clickhouse-driver with Asyncio +#### `clickhouse-connect` & `clickhouse-driver` with Asyncio `clickhouse-connect` code is synchronous and running synchronous functions in an async application is a workaround and might not be as efficient as using a library designed for asynchronous operations from the ground up. Problem is there are few libs/modules in Python. So you can use `concurrent.futures` and `ThreadpoolExecutor` or `ProcessPoolExecutor`. Python GIL has a mutex over Threads but not to Processes so if you need performance at the cost of using processes instead of threads (not much different for medium workloads) you can use `ProcesspoolExecutor` instead. Some info about this from the tinybird guys [https://www.tinybird.co/blog-posts/killing-the-processpoolexecutor](https://www.tinybird.co/blog-posts/killing-the-processpoolexecutor) -For clickhouse-connect +For `clickhouse-connect` ```python import asyncio @@ -133,7 +131,7 @@ if __name__ == '__main__': asyncio.run(main()) ``` -Clickhouse-driver code is also synchronous and suffers the same problem as clickhouse-connect +`clickhouse-driver code is also synchronous and suffers the same problem as `clickhouse-connect` [https://clickhouse-driver.readthedocs.io/en/latest/quickstart.html#async-and-multithreading](https://clickhouse-driver.readthedocs.io/en/latest/quickstart.html#async-and-multithreading) diff --git a/content/en/altinity-kb-integrations/Spark.md b/content/en/altinity-kb-integrations/Spark.md index 2f270b92ea..4de77b2d20 100644 --- a/content/en/altinity-kb-integrations/Spark.md +++ b/content/en/altinity-kb-integrations/Spark.md @@ -1,12 +1,12 @@ --- -title: "ClickHouse + Spark" +title: "ClickHouse® + Spark" linkTitle: "Spark" weight: 100 description: >- Spark --- -## ClickHouse + Spark +## ClickHouse® + Spark ### jdbc @@ -57,13 +57,13 @@ Arrays, Higher-order functions, machine learning, integration with lot of differ * Spark + ClickHouse: not a fight, but a symbiosis (Russian) https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/spark_and_clickhouse.pdf (russian) * Using a bunch of ClickHouse and Spark in MFI Soft (Russian) https://www.youtube.com/watch?v=ID8eTnmag0s (russian) * Spark read and write ClickHouse (Chinese: Spark读写ClickHouse) https://yerias.github.io/2020/12/08/clickhouse/9/#Jdbc%E6%93%8D%E4%BD%9Cclickhouse -* Spark JDBC write clickhouse operation summary (Chinese: Spark JDBC 写 clickhouse 操作总结) https://www.jianshu.com/p/43f78c8a025b?hmsr=toutiao.io&utm_campaign=toutiao.io&utm_medium=toutiao.io&utm_source=toutiao.io +* Spark JDBC write ClickHouse operation summary (Chinese: Spark JDBC 写 ClickHouse 操作总结) https://www.jianshu.com/p/43f78c8a025b?hmsr=toutiao.io&utm_campaign=toutiao.io&utm_medium=toutiao.io&utm_source=toutiao.io * Spark-sql is based on ClickHouse's DataSourceV2 data source extension (Chinese: spark-sql基于ClickHouse的DataSourceV2数据源扩展) https://www.cnblogs.com/mengyao/p/4689866.html * Alibaba integration instructions (English) https://www.alibabacloud.com/help/doc-detail/191192.htm * Tencent integration instructions (English) https://intl.cloud.tencent.com/document/product/1026/35884 * Yandex DataProc demo: loading files from S3 to ClickHouse with Spark (Russian) https://www.youtube.com/watch?v=N3bZW0_rRzI -* ClickHouse official documentation_Spark JDBC writes some pits of ClickHouse (Chinese: clickhouse官方文档_Spark JDBC写ClickHouse的一些坑) https://blog.csdn.net/weixin_39615984/article/details/111206050 +* ClickHouse official documentation_Spark JDBC writes some pits of ClickHouse (Chinese: ClickHouse官方文档_Spark JDBC写ClickHouse的一些坑) https://blog.csdn.net/weixin_39615984/article/details/111206050 * ClickHouse data import: Flink, Spark, Kafka, MySQL, Hive (Chinese: 篇五|ClickHouse数据导入 Flink、Spark、Kafka、MySQL、Hive) https://zhuanlan.zhihu.com/p/299094269 * Baifendian Big Data Technical Team: Practice of ClickHouse data synchronization solutionbased on multiple Spark tasks (Chinese: 百分点大数据技术团队:基于多 Spark 任务的 ClickHouse 数据同步方案实践) https://www.6aiq.com/article/1635461873075 * SPARK-CLICKHOUSE-ES REAL-TIME PROJECT EIGHTH DAY-PRECISE ONE-TIME CONSUMPTION SAVE OFFSET. (Chinese: SPARK-CLICKHOUSE-ES实时项目第八天-精确一次性消费保存偏移量) https://www.freesion.com/article/71421322524/ diff --git a/content/en/altinity-kb-integrations/_index.md b/content/en/altinity-kb-integrations/_index.md index dc1cd483d1..6951848f56 100644 --- a/content/en/altinity-kb-integrations/_index.md +++ b/content/en/altinity-kb-integrations/_index.md @@ -6,6 +6,6 @@ keywords: - clickhouse bi - clickhouse kafka description: > - Learn how you can integrate cloud services, BI tools, kafka, MySQL, Spark, MindsDB, and more with ClickHouse. + Learn how you can integrate cloud services, BI tools, kafka, MySQL, Spark, MindsDB, and more with ClickHouse® weight: 4 --- diff --git a/content/en/altinity-kb-integrations/altinity-cloud/_index.md b/content/en/altinity-kb-integrations/altinity-cloud/_index.md index fc03237ffc..b857fc0794 100644 --- a/content/en/altinity-kb-integrations/altinity-cloud/_index.md +++ b/content/en/altinity-kb-integrations/altinity-cloud/_index.md @@ -2,6 +2,6 @@ title: "Cloud Services" linkTitle: "Cloud Services" description: > - Tips and tricks for using ClickHouse with different cloud services. + Tips and tricks for using ClickHouse® with different cloud services. weight: 4 --- diff --git a/content/en/altinity-kb-integrations/altinity-cloud/altinity-cloud-access-management.md b/content/en/altinity-kb-integrations/altinity-cloud/altinity-cloud-access-management.md index 129586e1bd..1ec7a1583a 100644 --- a/content/en/altinity-kb-integrations/altinity-cloud/altinity-cloud-access-management.md +++ b/content/en/altinity-kb-integrations/altinity-cloud/altinity-cloud-access-management.md @@ -5,7 +5,7 @@ description: > Enabling access_management for Altinity.Cloud databases. weight: 5 --- -Organizations that want to enable administrative users in their Altinity.Cloud ClickHouse servers can do so by enabling `access_management` manually. This allows for administrative users to be created on the specific ClickHouse Cluster. +Organizations that want to enable administrative users in their Altinity.Cloud ClickHouse® servers can do so by enabling `access_management` manually. This allows for administrative users to be created on the specific ClickHouse Cluster. {{% alert title="WARNING" color="warning" %}} Modifying the ClickHouse cluster settings manually can lead to the cluster not loading or other issues. Change settings only with full consultation with an Altinity.Cloud support team member, and be ready to remove settings if they cause any disruption of service. diff --git a/content/en/altinity-kb-integrations/altinity-kb-google-s3-gcs.md b/content/en/altinity-kb-integrations/altinity-kb-google-s3-gcs.md index 826244b994..10accb14d9 100644 --- a/content/en/altinity-kb-integrations/altinity-kb-google-s3-gcs.md +++ b/content/en/altinity-kb-integrations/altinity-kb-google-s3-gcs.md @@ -11,5 +11,5 @@ Essentially you can follow the steps from the [Migrating from Amazon S3 to Cloud 1. Set up a GCS bucket. 2. This bucket must be set as part of the default project for the account. This configuration can be found in settings -> interoperability. 3. Generate a HMAC key for the account, can be done in settings -> interoperability, in the section for user account access keys. -4. In ClickHouse, replace the S3 bucket endpoint with the GCS bucket endpoint This must be done with the path-style GCS endpoint: `https://storage.googleapis.com/BUCKET_NAME/OBJECT_NAME`. +4. In ClickHouse®, replace the S3 bucket endpoint with the GCS bucket endpoint This must be done with the path-style GCS endpoint: `https://storage.googleapis.com/BUCKET_NAME/OBJECT_NAME`. 5. Replace the aws access key id and aws secret access key with the corresponding parts of the HMAC key. diff --git a/content/en/altinity-kb-integrations/altinity-kb-kafka/_index.md b/content/en/altinity-kb-integrations/altinity-kb-kafka/_index.md index 4a6de0c312..f4b242f478 100644 --- a/content/en/altinity-kb-integrations/altinity-kb-kafka/_index.md +++ b/content/en/altinity-kb-integrations/altinity-kb-kafka/_index.md @@ -8,7 +8,7 @@ description: > git log -- contrib/librdkafka | git name-rev --stdin ``` -| **ClickHouse version** | **librdkafka version** | +| **ClickHouse® version** | **librdkafka version** | | :--- | :--- | | 21.10+ ([\#27883](https://github.com/ClickHouse/ClickHouse/pull/27883)) | [1.6.1](https://github.com/edenhill/librdkafka/blob/v1.6.1/CHANGELOG.md) + snappy fixes + boring ssl + illumos_build fixes + edenhill#3279 fix| | 21.6+ ([\#23874](https://github.com/ClickHouse/ClickHouse/pull/23874)) | [1.6.1](https://github.com/edenhill/librdkafka/blob/v1.6.1/CHANGELOG.md) + snappy fixes + boring ssl + illumos_build fixes| diff --git a/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-adjusting-librdkafka-settings.md b/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-adjusting-librdkafka-settings.md index c7fe4955b1..32c171ddc0 100644 --- a/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-adjusting-librdkafka-settings.md +++ b/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-adjusting-librdkafka-settings.md @@ -71,7 +71,7 @@ Sometimes the consumer group needs to be explicitly allowed in the broker UI con To connect to some Kafka cloud services you may need to use certificates. -If needed they can be converted to pem format and inlined into ClickHouse config.xml +If needed they can be converted to pem format and inlined into ClickHouse® config.xml Example: ```xml @@ -137,7 +137,7 @@ See [https://github.com/ClickHouse/ClickHouse/issues/12609](https://github.com/C ## How to test connection settings -Use kafkacat utility - it internally uses same library to access Kafla as clickhouse itself and allows easily to test different settings. +Use kafkacat utility - it internally uses same library to access Kafla as ClickHouse itself and allows easily to test different settings. ```bash kafkacat -b my_broker:9092 -C -o -10 -t my_topic \ diff --git a/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-exactly-once-semantics.md b/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-exactly-once-semantics.md index 22211a9374..fa54523134 100644 --- a/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-exactly-once-semantics.md +++ b/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-exactly-once-semantics.md @@ -4,7 +4,7 @@ linkTitle: "Exactly once semantics" description: > Exactly once semantics --- -EOS consumer (isolation.level=read_committed) is enabled by default since librdkafka 1.2.0, so for ClickHouse - since 20.2 +EOS consumer (isolation.level=read_committed) is enabled by default since librdkafka 1.2.0, so for ClickHouse® - since 20.2 See: diff --git a/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-kafka-main-parsing-loop.md b/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-kafka-main-parsing-loop.md index 6839e4a047..faf9831575 100644 --- a/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-kafka-main-parsing-loop.md +++ b/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-kafka-main-parsing-loop.md @@ -4,7 +4,7 @@ linkTitle: "Kafka main parsing loop" description: > Kafka main parsing loop --- -One of the threads from scheduled_pool (pre 20.9) / `background_message_broker_schedule_pool` (after 20.9) do that in infinite loop: +One of the threads from scheduled_pool (pre ClickHouse® 20.9) / `background_message_broker_schedule_pool` (after 20.9) do that in infinite loop: 1. Batch poll (time limit: `kafka_poll_timeout_ms` 500ms, messages limit: `kafka_poll_max_batch_size` 65536) 2. Parse messages. diff --git a/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-kafka-mv-consuming.md b/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-kafka-mv-consuming.md index 117349af53..56c1a09c1f 100644 --- a/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-kafka-mv-consuming.md +++ b/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-kafka-mv-consuming.md @@ -25,7 +25,7 @@ This is how we can see it in the logs: 2022.11.09 17:49:34.282077 [ 2385 ] {} StorageKafka (kafka_table): Started streaming to 2 attached views ``` -* How ClickHouse calculates the number of threads depending on the `thread_per_consumer` setting: +* How ClickHouse® calculates the number of threads depending on the `thread_per_consumer` setting: ```c++ auto stream_count = thread_per_consumer ? 1 : num_created_consumers; diff --git a/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-kafka-parallel-consuming.md b/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-kafka-parallel-consuming.md index 0aec709a47..4f7b62d5d3 100644 --- a/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-kafka-parallel-consuming.md +++ b/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-kafka-parallel-consuming.md @@ -4,7 +4,7 @@ linkTitle: "Kafka parallel consuming" description: > Kafka parallel consuming --- -For very large topics when you need more parallelism (especially on the insert side) you may use several tables with the same pipeline (pre 20.9) or enable `kafka_thread_per_consumer` (after 20.9). +For very large topics when you need more parallelism (especially on the insert side) you may use several tables with the same pipeline (pre ClickHouse® 20.9) or enable `kafka_thread_per_consumer` (after 20.9). ```ini kafka_num_consumers = N, diff --git a/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-rewind-fast-forward-replay.md b/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-rewind-fast-forward-replay.md index 5564d5c883..2bee29c05b 100644 --- a/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-rewind-fast-forward-replay.md +++ b/content/en/altinity-kb-integrations/altinity-kb-kafka/altinity-kb-rewind-fast-forward-replay.md @@ -4,7 +4,7 @@ linkTitle: "Rewind / fast-forward / replay" description: > Rewind / fast-forward / replay --- -* Step 1: Detach Kafka tables in ClickHouse +* Step 1: Detach Kafka tables in ClickHouse® ``` DETACH TABLE db.kafka_table_name ON CLUSTER '{cluster}'; ``` diff --git a/content/en/altinity-kb-integrations/altinity-kb-kafka/background_message_broker_schedule_pool_size.md b/content/en/altinity-kb-integrations/altinity-kb-kafka/background_message_broker_schedule_pool_size.md index 0da9e87c34..3ff9652924 100644 --- a/content/en/altinity-kb-integrations/altinity-kb-kafka/background_message_broker_schedule_pool_size.md +++ b/content/en/altinity-kb-integrations/altinity-kb-kafka/background_message_broker_schedule_pool_size.md @@ -43,7 +43,7 @@ If the number of free threads is zero or very close to zero, you might experienc ### Adjusting the Thread Pool Size -To fix the problem, increase the `background_message_broker_schedule_pool_size` setting in your `config.xml`. For older ClickHouse versions, you may need to adjust this setting in both the default profile in `users.xml` and `config.xml`. +To fix the problem, increase the `background_message_broker_schedule_pool_size` setting in your `config.xml`. For older ClickHouse® versions, you may need to adjust this setting in both the default profile in `users.xml` and `config.xml`. ### Estimating the Required Pool Size diff --git a/content/en/altinity-kb-integrations/altinity-kb-kafka/error-handling.md b/content/en/altinity-kb-integrations/altinity-kb-kafka/error-handling.md index b5320bb520..e2ba79a27e 100644 --- a/content/en/altinity-kb-integrations/altinity-kb-kafka/error-handling.md +++ b/content/en/altinity-kb-integrations/altinity-kb-kafka/error-handling.md @@ -14,7 +14,7 @@ It's also possible to skip up to N malformed messages for each block, with used ## After 21.6 -It's possible to stream messages which could not be parsed, this behavior could be enabled via setting: `kafka_handle_error_mode='stream'` and clickhouse wil write error and message from Kafka itself to two new virtual columns: `_error, _raw_message`. +It's possible to stream messages which could not be parsed, this behavior could be enabled via setting: `kafka_handle_error_mode='stream'` and ClickHouse® wil write error and message from Kafka itself to two new virtual columns: `_error, _raw_message`. So you can create another Materialized View which would collect to a separate table all errors happening while parsing with all important information like offset and content of message. diff --git a/content/en/altinity-kb-integrations/altinity-kb-kafka/kafka-schema-inference.md b/content/en/altinity-kb-integrations/altinity-kb-kafka/kafka-schema-inference.md index fefb488a52..a47e65a6c9 100644 --- a/content/en/altinity-kb-integrations/altinity-kb-kafka/kafka-schema-inference.md +++ b/content/en/altinity-kb-integrations/altinity-kb-kafka/kafka-schema-inference.md @@ -1,14 +1,12 @@ --- -title: "Inferring Schema from AvroConfluent Messages in Kafka for ClickHouse" +title: "Inferring Schema from AvroConfluent Messages in Kafka for ClickHouse®" linkTitle: "Schema Inference for Kafka" weight: 100 description: >- - Learn how to define Kafka table structures in ClickHouse by using Avro's schema registry & sample message. + Learn how to define Kafka table structures in ClickHouse® by using Avro's schema registry & sample message. --- -## Inferring Schema from AvroConfluent Messages in Kafka for ClickHouse - -To consume messages from Kafka within ClickHouse, you need to define the `ENGINE=Kafka` table structure with all the column names and types. +To consume messages from Kafka within ClickHouse®, you need to define the `ENGINE=Kafka` table structure with all the column names and types. This task can be particularly challenging when dealing with complex Avro messages, as manually determining the exact schema for ClickHouse is both tricky and time-consuming. This complexity is particularly frustrating in the case of Avro formats, where the column names and their types are already clearly defined in the schema registry. @@ -17,7 +15,7 @@ Although ClickHouse supports schema inference for files, it does not natively su Here’s a workaround to infer the schema using AvroConfluent messages: -### Step 1: Capture and Store a Raw Kafka Message +## Step 1: Capture and Store a Raw Kafka Message First, create a table in ClickHouse to consume a raw message from Kafka and store it as a file: @@ -35,7 +33,7 @@ SETTINGS max_block_size=1, stream_like_engine_allow_direct_select=1; DROP TABLE test_kafka; ``` -### Step 2: Infer Schema Using the Stored File +## Step 2: Infer Schema Using the Stored File Using the stored raw message, let ClickHouse infer the schema based on the AvroConfluent format and a specified schema registry URL: ```sql @@ -59,7 +57,7 @@ statement: CREATE TEMPORARY TABLE test ENGINE = Memory ``` -### Step 3: Create the Kafka Table with the Inferred Schema +## Step 3: Create the Kafka Table with the Inferred Schema Now, use the inferred schema to create the Kafka table: ```sql @@ -79,7 +77,7 @@ SETTINGS kafka_broker_list = 'localhost:29092', This approach reduces manual schema definition efforts and enhances data integration workflows by utilizing the schema inference capabilities of ClickHouse for AvroConfluent messages. -### Appendix +## Appendix **Avro** is a binary serialization format used within Apache Kafka for efficiently serializing data with a compact binary format. It relies on schemas, which define the structure of the serialized data, to ensure robust data compatibility and type safety. diff --git a/content/en/altinity-kb-integrations/bi-tools.md b/content/en/altinity-kb-integrations/bi-tools.md index e33b866858..ee1d2f09ea 100644 --- a/content/en/altinity-kb-integrations/bi-tools.md +++ b/content/en/altinity-kb-integrations/bi-tools.md @@ -7,7 +7,7 @@ description: > * Superset: [https://superset.apache.org/docs/databases/clickhouse](https://superset.apache.org/docs/databases/clickhouse) * Metabase: [https://github.com/enqueue/metabase-clickhouse-driver](https://github.com/enqueue/metabase-clickhouse-driver) * Querybook: [https://www.querybook.org/docs/setup_guide/connect_to_query_engines/\#all-query-engines](https://www.querybook.org/docs/setup_guide/connect_to_query_engines/#all-query-engines) -* Tableau: [Altinity Tableau Connector for ClickHouse](https://github.com/Altinity/tableau-connector-for-clickhouse) support both JDBC & ODBC drivers +* Tableau: [Altinity Tableau Connector for ClickHouse®](https://github.com/Altinity/tableau-connector-for-clickhouse) support both JDBC & ODBC drivers * Looker: [https://docs.looker.com/setup-and-management/database-config/clickhouse](https://docs.looker.com/setup-and-management/database-config/clickhouse) * Apache Zeppelin * SeekTable diff --git a/content/en/altinity-kb-integrations/clickhouse-odbc.md b/content/en/altinity-kb-integrations/clickhouse-odbc.md index a68e4b40fb..d10fd50adc 100644 --- a/content/en/altinity-kb-integrations/clickhouse-odbc.md +++ b/content/en/altinity-kb-integrations/clickhouse-odbc.md @@ -1,12 +1,12 @@ --- -title: "ODBC Driver for ClickHouse" -linkTitle: "ODBC Driver for ClickHouse" +title: "ODBC Driver for ClickHouse®" +linkTitle: "ODBC Driver for ClickHouse®" weight: 100 description: >- - ODBC Driver for ClickHouse + ODBC Driver for ClickHouse® --- -[ODBC](https://docs.microsoft.com/en-us/sql/odbc/reference/odbc-overview) interface for ClickHouse RDBMS. +[ODBC](https://docs.microsoft.com/en-us/sql/odbc/reference/odbc-overview) interface for ClickHouse® RDBMS. Licensed under the [Apache 2.0](https://github.com/ClickHouse/clickhouse-odbc?tab=Apache-2.0-1-ov-file#readme). @@ -28,7 +28,7 @@ applications (cygwin / msys64 based) may require driver linked agains unixodbc. ```bash brew install https://raw.githubusercontent.com/proller/homebrew-core/chodbc/Formula/clickhouse-odbc.rb ``` -3. Add clickhouse DSN configuration into ~/.odbc.ini file. ([sample]()) +3. Add ClickHouse DSN configuration into ~/.odbc.ini file. ([sample]()) Note: that install driver linked against iodbc (which is default for Mac), some homebrew applications (like python) may require unixodbc driver to work properly. In that case see Build section below. @@ -36,7 +36,7 @@ Note: that install driver linked against iodbc (which is default for Mac), some ### Linux 1. DEB/RPM packaging is not provided yet, please build & install the driver from sources. -2. Add clickhouse DSN configuration into ~/.odbc.ini file. ([sample]()) +2. Add ClickHouse DSN configuration into ~/.odbc.ini file. ([sample]()) ## Configuration @@ -47,23 +47,23 @@ On Windows you can create/edit DSN using GUI tool through Control Panel. The list of DSN parameters recognized by the driver is as follows: -| Parameter | Default value | Description | -| :-----------------: | :----------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `Url` | empty | URL that points to a running ClickHouse instance, may include username, password, port, database, etc. | -| `Proto` | deduced from `Url`, or from `Port` and `SSLMode`: `https` if `443` or `8443` or `SSLMode` is not empty, `http` otherwise | Protocol, one of: `http`, `https` | -| `Server` or `Host` | deduced from `Url` | IP or hostname of a server with a running ClickHouse instance on it | -| `Port` | deduced from `Url`, or from `Proto`: `8443` if `https`, `8123` otherwise | Port on which the ClickHouse instance is listening | -| `Path` | `/query` | Path portion of the URL | -| `UID` or `Username` | `default` | User name | -| `PWD` or `Password` | empty | Password | -| `Database` | `default` | Database name to connect to | -| `Timeout` | `30` | Connection timeout | -| `SSLMode` | empty | Certificate verification method (used by TLS/SSL connections, ignored in Windows), one of: `allow`, `prefer`, `require`, use `allow` to enable [`SSL_VERIFY_PEER`](https://www.openssl.org/docs/manmaster/man3/SSL_CTX_set_verify.html) TLS/SSL certificate verification mode, [`SSL_VERIFY_PEER \| SSL_VERIFY_FAIL_IF_NO_PEER_CERT`](https://www.openssl.org/docs/manmaster/man3/SSL_CTX_set_verify.html) is used otherwise | -| `PrivateKeyFile` | empty | Path to private key file (used by TLS/SSL connections), can be empty if no private key file is used | -| `CertificateFile` | empty | Path to certificate file (used by TLS/SSL connections, ignored in Windows), if the private key and the certificate are stored in the same file, this can be empty if `PrivateKeyFile` is specified | -| `CALocation` | empty | Path to the file or directory containing the CA/root certificates (used by TLS/SSL connections, ignored in Windows) | -| `DriverLog` | `on` if `CMAKE_BUILD_TYPE` is `Debug`, `off` otherwise | Enable or disable the extended driver logging | -| `DriverLogFile` | `\temp\clickhouse-odbc-driver.log` on Windows, `/tmp/clickhouse-odbc-driver.log` otherwise | Path to the extended driver log file (used when `DriverLog` is `on`) | +| Parameter | Default value | Description | +| :-----------------: | :----------------------------------------------------------------------------------------------------------------------: |:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `Url` | empty | URL that points to a running ClickHouse instance, may include username, password, port, database, etc. | +| `Proto` | deduced from `Url`, or from `Port` and `SSLMode`: `https` if `443` or `8443` or `SSLMode` is not empty, `http` otherwise | Protocol, one of: `http`, `https` | +| `Server` or `Host` | deduced from `Url` | IP or hostname of a server with a running ClickHouse instance on it | +| `Port` | deduced from `Url`, or from `Proto`: `8443` if `https`, `8123` otherwise | Port on which the ClickHouse instance is listening | +| `Path` | `/query` | Path portion of the URL | +| `UID` or `Username` | `default` | User name | +| `PWD` or `Password` | empty | Password | +| `Database` | `default` | Database name to connect to | +| `Timeout` | `30` | Connection timeout | +| `SSLMode` | empty | Certificate verification method (used by TLS/SSL connections, ignored in Windows), one of: `allow`, `prefer`, `require`, use `allow` to enable [SSL_VERIFY_PEER](https://www.openssl.org/docs/manmaster/man3/SSL_CTX_set_verify.html) TLS/SSL certificate verification mode, [SSL_VERIFY_PEER \| SSL_VERIFY_FAIL_IF_NO_PEER_CERT](https://www.openssl.org/docs/manmaster/man3/SSL_CTX_set_verify.html) is used otherwise | +| `PrivateKeyFile` | empty | Path to private key file (used by TLS/SSL connections), can be empty if no private key file is used | +| `CertificateFile` | empty | Path to certificate file (used by TLS/SSL connections, ignored in Windows), if the private key and the certificate are stored in the same file, this can be empty if `PrivateKeyFile` is specified | +| `CALocation` | empty | Path to the file or directory containing the CA/root certificates (used by TLS/SSL connections, ignored in Windows) | +| `DriverLog` | `on` if `CMAKE_BUILD_TYPE` is `Debug`, `off` otherwise | Enable or disable the extended driver logging | +| `DriverLogFile` | `\temp\clickhouse-odbc-driver.log` on Windows, `/tmp/clickhouse-odbc-driver.log` otherwise | Path to the extended driver log file (used when `DriverLog` is `on`) | diff --git a/content/en/altinity-kb-integrations/mysql-clickhouse.md b/content/en/altinity-kb-integrations/mysql-clickhouse.md index ad521ccd00..b4056adf3c 100644 --- a/content/en/altinity-kb-integrations/mysql-clickhouse.md +++ b/content/en/altinity-kb-integrations/mysql-clickhouse.md @@ -8,15 +8,15 @@ description: >- ### Replication using MaterializeMySQL. -- https://clickhouse.tech/docs/en/engines/database-engines/materialized-mysql/ +- https://clickhouse.com/docs/en/engines/database-engines/materialized-mysql - https://translate.google.com/translate?sl=auto&tl=en&u=https://www.jianshu.com/p/d0d4306411b3 - https://raw.githubusercontent.com/ClickHouse/clickhouse-presentations/master/meetup47/materialize_mysql.pdf -It reads mysql binlog directly and transform queries into something which clickhouse can support. Supports updates and deletes (under the hood implemented via something like ReplacingMergeTree with enforced FINAL and 'deleted' flag). Status is 'experimental', there are quite a lot of known limitations and issues, but some people use it. The original author of that went to another project, and the main team don't have a lot of resource to improve that for now (more important thing in the backlog) +It reads mysql binlog directly and transform queries into something which ClickHouse® can support. Supports updates and deletes (under the hood implemented via something like ReplacingMergeTree with enforced FINAL and 'deleted' flag). Status is 'experimental', there are quite a lot of known limitations and issues, but some people use it. The original author of that went to another project, and the main team don't have a lot of resource to improve that for now (more important thing in the backlog) The replication happens on the mysql database level. -### Replication using debezium + Kafka (+ Altinity Sink Connector) +### Replication using debezium + Kafka (+ Altinity Sink Connector for ClickHouse) Debezium can read the binlog and transform it to Kafka messages. @@ -24,7 +24,7 @@ You can later capture the stream of message on ClickHouse side and process it as Please remember that currently Kafka engine supports only at-least-once delivery guarantees. It's used by several companies, quite nice & flexible. But initial setup may require some efforts. -#### Altinity Sink Connector +#### Altinity Sink Connector for ClickHouse Can handle transformation of debezium messages (with support for DELETEs and UPDATEs) and exactly-once delivery for you. @@ -42,16 +42,16 @@ Have no experience / feedback there, but should be very similar to debezium. See https://altinity.com/blog/2018/6/30/realtime-mysql-clickhouse-replication-in-practice That was done long time ago in altinity for one use-case, and it seem like it was never used outside of that. -It's a python application with lot of switches which can copy a schema or read binlog from mysql and put it to clickhouse. +It's a python application with lot of switches which can copy a schema or read binlog from mysql and put it to ClickHouse. Not supported currently. But it's just a python, so maybe can be adjusted to different needs. -### Accessing MySQL data via integration engines from inside clickhouse. +### Accessing MySQL data via integration engines from inside ClickHouse. -MySQL [table engine](https://clickhouse.com/docs/en/engines/table-engines/integrations/mysql/) / [table function](https://clickhouse.com/docs/en/sql-reference/table-functions/mysql/), or [MySQL database engine](https://clickhouse.com/docs/en/engines/database-engines/mysql/) - clickhouse just connects to mysql server as a client, and can do normal selects. +MySQL [table engine](https://clickhouse.com/docs/en/engines/table-engines/integrations/mysql/) / [table function](https://clickhouse.com/docs/en/sql-reference/table-functions/mysql/), or [MySQL database engine](https://clickhouse.com/docs/en/engines/database-engines/mysql/) - ClickHouse just connects to mysql server as a client, and can do normal selects. We had webinar about that a year ago: https://www.youtube.com/watch?v=44kO3UzIDLI -Using that you can easily create some ETL script which will copy the data from mysql to clickhouse regularly, i.e. something like +Using that you can easily create some ETL script which will copy the data from mysql to ClickHouse regularly, i.e. something like ```sql INSERT INTO clickhouse_table SELECT * FROM mysql_table WHERE id > ... @@ -59,7 +59,7 @@ INSERT INTO clickhouse_table SELECT * FROM mysql_table WHERE id > ... Works great if you have append only table in MySQL. -In newer clickhouse versions you can query this was also sharded / replicated MySQL cluster - see [ExternalDistributed](https://clickhouse.com/docs/en/engines/table-engines/integrations/ExternalDistributed/) +In newer ClickHouse versions you can query this was also sharded / replicated MySQL cluster - see [ExternalDistributed](https://clickhouse.com/docs/en/engines/table-engines/integrations/ExternalDistributed/) ### MySQL dictionaries diff --git a/content/en/altinity-kb-interfaces/_index.md b/content/en/altinity-kb-interfaces/_index.md index 5fb1e32fa8..e8b5b644f7 100644 --- a/content/en/altinity-kb-interfaces/_index.md +++ b/content/en/altinity-kb-interfaces/_index.md @@ -4,6 +4,6 @@ linkTitle: "Interfaces" keywords: - clickhouse interface description: > - See the frequent questions users have about clickhouse-client. + Frequent questions users have about `clickhouse-client` weight: 9 --- diff --git a/content/en/altinity-kb-interfaces/altinity-kb-clickhouse-client.md b/content/en/altinity-kb-interfaces/altinity-kb-clickhouse-client.md index 4ffaba09a4..4346abd242 100644 --- a/content/en/altinity-kb-interfaces/altinity-kb-clickhouse-client.md +++ b/content/en/altinity-kb-interfaces/altinity-kb-clickhouse-client.md @@ -4,7 +4,7 @@ linkTitle: "clickhouse-client" keywords: - clickhouse client description: > - ClickHouse client + ClickHouse® client --- Q. How can I input multi-line SQL code? can you guys give me an example? @@ -50,4 +50,4 @@ Also, it’s possible to have several client config files and pass one of them t References: -* [https://clickhouse.tech/docs/en/interfaces/cli/](https://clickhouse.tech/docs/en/interfaces/cli/) +* [https://clickhouse.com/docs/en/interfaces/cli](https://clickhouse.com/docs/en/interfaces/cli) diff --git a/content/en/altinity-kb-kubernetes/_index.md b/content/en/altinity-kb-kubernetes/_index.md index d569a64ee2..01f8542a28 100644 --- a/content/en/altinity-kb-kubernetes/_index.md +++ b/content/en/altinity-kb-kubernetes/_index.md @@ -1,13 +1,13 @@ --- -title: "Kubernetes" -linkTitle: "Kubernetes" +title: "Running ClickHouse® in Kubernetes" +linkTitle: "Running ClickHouse® in Kubernetes" keywords: - clickhouse in kubernetes - kubernetes issues description: > - Run ClickHouse in Kubernetes without any issues. + Run ClickHouse® in Kubernetes without any issues. weight: 8 --- -## clickhouse-backup +## Sample YAML from the Altinity Kubernetes Operator for ClickHouse® [setup-example.yaml](https://github.com/Altinity/clickhouse-operator/blob/eb3fc4e28514d0d6ea25a40698205b02949bcf9d/docs/chi-examples/03-persistent-volume-07-do-not-chown.yaml) diff --git a/content/en/altinity-kb-kubernetes/altinity-kb-possible-issues-with-running-clickhouse-in-k8s.md b/content/en/altinity-kb-kubernetes/altinity-kb-possible-issues-with-running-clickhouse-in-k8s.md index cdfc942c2f..d1dd337b23 100644 --- a/content/en/altinity-kb-kubernetes/altinity-kb-possible-issues-with-running-clickhouse-in-k8s.md +++ b/content/en/altinity-kb-kubernetes/altinity-kb-possible-issues-with-running-clickhouse-in-k8s.md @@ -1,10 +1,10 @@ --- -title: "Possible issues with running ClickHouse in k8s" -linkTitle: "Possible issues with running ClickHouse in k8s" +title: "Possible issues with running ClickHouse® in K8s" +linkTitle: "Possible issues with running ClickHouse® in K8s" description: > - Possible issues with running ClickHouse in k8s + Possible issues with running ClickHouse® in K8s --- -The biggest problem with running ClickHouse in k8s, happens when clickhouse-server can't start for some reason and pod is falling in CrashloopBackOff, so you can't easily get in the pod and check/fix/restart ClickHouse. +The biggest problem with running ClickHouse® in K8s, happens when clickhouse-server can't start for some reason and pod is falling in CrashloopBackOff, so you can't easily get in the pod and check/fix/restart ClickHouse. There is multiple possible reasons for this, some of them can be fixed without manual intervention in pod: @@ -25,7 +25,7 @@ Caveats: 1. Not all configuration/state folders are being covered by persistent volumes. ([geobases](https://clickhouse.tech/docs/en/sql-reference/functions/ym-dict-functions/#multiple-geobases)) 2. Page cache belongs to k8s node and pv are being mounted to pod, in case of fast shutdown there is possibility to loss some data(needs to be clarified) -3. Some cloud providers (GKE) can have slow unlink command, which is important for clickhouse because it's needed for parts management. (`max_part_removal_threads` setting) +3. Some cloud providers (GKE) can have slow unlink command, which is important for ClickHouse because it's needed for parts management. (`max_part_removal_threads` setting) Useful commands: diff --git a/content/en/altinity-kb-schema-design/_index.md b/content/en/altinity-kb-schema-design/_index.md index 7ffb0165c0..d59eb63d6e 100644 --- a/content/en/altinity-kb-schema-design/_index.md +++ b/content/en/altinity-kb-schema-design/_index.md @@ -6,6 +6,6 @@ keywords: - clickhouse lowcardinality - clickhouse materialized view description: > - All you need to know about ClickHouse schema design, including materialized view, limitations, lowcardinality, codecs. + All you need to know about ClickHouse® schema design, including materialized view, limitations, lowcardinality, codecs. weight: 7 --- diff --git a/content/en/altinity-kb-schema-design/altinity-kb-dictionaries-vs-lowcardinality.md b/content/en/altinity-kb-schema-design/altinity-kb-dictionaries-vs-lowcardinality.md index 5593b87658..a3b9e7ef26 100644 --- a/content/en/altinity-kb-schema-design/altinity-kb-dictionaries-vs-lowcardinality.md +++ b/content/en/altinity-kb-schema-design/altinity-kb-dictionaries-vs-lowcardinality.md @@ -16,4 +16,4 @@ From the other hand: if data can be changed in future, and that change should im For example if you need to change the used currency rare every day- it would be quite stupid to update all historical records to apply the newest exchange rate. And putting it to dict will allow to do calculations with latest exchange rate at select time. -For dictionary it's possible to mark some of the attributes as injective. An attribute is called injective if different attribute values correspond to different keys. It would allow ClickHouse to replace dictGet call in GROUP BY with cheap dict key. +For dictionary it's possible to mark some of the attributes as injective. An attribute is called injective if different attribute values correspond to different keys. It would allow ClickHouse® to replace dictGet call in GROUP BY with cheap dict key. diff --git a/content/en/altinity-kb-schema-design/altinity-kb-jsoneachrow-tuples-and-mvs.md b/content/en/altinity-kb-schema-design/altinity-kb-jsoneachrow-tuples-and-mvs.md index f6488275f0..b17fa42788 100644 --- a/content/en/altinity-kb-schema-design/altinity-kb-jsoneachrow-tuples-and-mvs.md +++ b/content/en/altinity-kb-schema-design/altinity-kb-jsoneachrow-tuples-and-mvs.md @@ -64,7 +64,7 @@ FROM nest_tuple_destination Some hints: -- 💡 Beware of column names in ClickHouse they are Case sensitive. If a JSON message has the key names in Capitals, the Kafka/Source table should have the same column names in Capitals. +- 💡 Beware of column names in ClickHouse® they are Case sensitive. If a JSON message has the key names in Capitals, the Kafka/Source table should have the same column names in Capitals. - 💡 Also this `Tuple()` approach is not for Dynamic json schemas as explained above. In the case of having a dynamic schema, use the classic approach using `JSONExtract` set of functions. If the schema is fixed, you can use `Tuple()` for `JSONEachRow` format but you need to use classic tuple notation (using index reference) inside the MV, because using named tuples inside the MV won't work: diff --git a/content/en/altinity-kb-schema-design/backfill_column.md b/content/en/altinity-kb-schema-design/backfill_column.md index 44640e96da..0d1ff01b96 100644 --- a/content/en/altinity-kb-schema-design/backfill_column.md +++ b/content/en/altinity-kb-schema-design/backfill_column.md @@ -14,7 +14,7 @@ Sometimes you need to add a column into a huge table and backfill it with a data {{% alert title="Replicated setup" color="info" %}} In case of a replicated / sharded setup you need to have the dictionary and source table (dict_table / item_dict) on all nodes and they have to all have EXACTLY the same data. The easiest way to do this is to make dict_table replicated. -In this case, you will need to set the setting `allow_nondeterministic_mutations=1` on the user that runs the `ALTER TABLE`. See the [ClickHouse docs](https://clickhouse.com/docs/en/operations/settings/settings#allow_nondeterministic_mutations) for more information about this setting. +In this case, you will need to set the setting `allow_nondeterministic_mutations=1` on the user that runs the `ALTER TABLE`. See the [ClickHouse® docs](https://clickhouse.com/docs/en/operations/settings/settings#allow_nondeterministic_mutations) for more information about this setting. {{% /alert %}} diff --git a/content/en/altinity-kb-schema-design/best-schema-for-storing-many-metrics-registered-from-the-single-source.md b/content/en/altinity-kb-schema-design/best-schema-for-storing-many-metrics-registered-from-the-single-source.md index 37e709769c..aab6460b5f 100644 --- a/content/en/altinity-kb-schema-design/best-schema-for-storing-many-metrics-registered-from-the-single-source.md +++ b/content/en/altinity-kb-schema-design/best-schema-for-storing-many-metrics-registered-from-the-single-source.md @@ -72,7 +72,7 @@ Pros and cons: * easy to extend, you can have very dynamic / huge number of metrics. * the only option to store hierarchical / complicated data structures, also with arrays etc. inside. * good for sparse recording (each time point can have only 1% of all the possible metrics) - * ClickHouse has efficient API to work with JSON + * ClickHouse® has efficient API to work with JSON * nice if your data originally came in JSON (don't need to reformat) * Cons * uses storage non efficiently @@ -88,7 +88,7 @@ Same pros/cons as raw JSON, but usually bit more compact than JSON Pros and cons: * Pros - * clickhouse has efficient API to work with URLs (extractURLParameter etc) + * ClickHouse has efficient API to work with URLs (extractURLParameter etc) * can have sense if you data came in such format (i.e. you can store GET / POST request data directly w/o reprocessing) * Cons * slower than arrays diff --git a/content/en/altinity-kb-schema-design/codecs/_index.md b/content/en/altinity-kb-schema-design/codecs/_index.md index 75a70ca0b8..ea8c9846e3 100644 --- a/content/en/altinity-kb-schema-design/codecs/_index.md +++ b/content/en/altinity-kb-schema-design/codecs/_index.md @@ -15,7 +15,7 @@ description: > | Gorilla | Floating Point Types | Calculates XOR between current and previous value; suitable for slowly changing numbers | | T64 | Integer, Time Series Data, Timestamps | Preprocessor (should be followed by some compression codec). Crops unused high bits; puts them into a 64x64 bit matrix; optimized for 64-bit data types | | GCD | Integer Numbers | Preprocessor (should be followed by some compression codec). Greatest common divisor compression; divides values by a common divisor; effective for divisible integer sequences | -| FPC | Floating Point Numbers | Designed for Float64; Algorithm detailed in [FPC paper](https://userweb.cs.txstate.edu/~burtscher/papers/dcc07a.pdf), [ClickHouse PR #37553](https://github.com/ClickHouse/ClickHouse/pull/37553) | +| FPC | Floating Point Numbers | Designed for Float64; Algorithm detailed in [FPC paper](https://userweb.cs.txstate.edu/~burtscher/papers/dcc07a.pdf), [ClickHouse® PR #37553](https://github.com/ClickHouse/ClickHouse/pull/37553) | | ZSTD_QAT | Any | Requires hardware support for QuickAssist Technology (QAT) hardware; provides accelerated compression tasks | | DEFLATE_QPL | Any | Requires hardware support for Intel’s QuickAssist Technology for DEFLATE compression; enhanced performance for specific hardware | | LowCardinality | String | It's not a codec, but a datatype modifier. Reduces representation size; effective for columns with low cardinality | diff --git a/content/en/altinity-kb-schema-design/floats-vs-decimals.md b/content/en/altinity-kb-schema-design/floats-vs-decimals.md index ddb8683916..bb5ed7eb21 100644 --- a/content/en/altinity-kb-schema-design/floats-vs-decimals.md +++ b/content/en/altinity-kb-schema-design/floats-vs-decimals.md @@ -43,7 +43,7 @@ SELECT (toDecimal64(100000000000000000., 1) - toDecimal64(100000000000000000., 1 ``` {{% alert title="Warning" color="warning" %}} -Because clickhouse uses MPP order of execution of a single query can vary on each run, and you can get slightly different results from the float column every time you run the query. +Because ClickHouse® uses MPP order of execution of a single query can vary on each run, and you can get slightly different results from the float column every time you run the query. Usually, this deviation is small, but it can be significant when some kind of arithmetic operation is performed on very large and very small numbers at the same time. {{% /alert %}} diff --git a/content/en/altinity-kb-schema-design/how-much-is-too-much.md b/content/en/altinity-kb-schema-design/how-much-is-too-much.md index 6c0f3054a3..7c125ccf48 100644 --- a/content/en/altinity-kb-schema-design/how-much-is-too-much.md +++ b/content/en/altinity-kb-schema-design/how-much-is-too-much.md @@ -3,12 +3,12 @@ title: "How much is too much?" linkTitle: "How much is too much?" weight: 100 description: >- - ClickHouse Limitations. + ClickHouse® Limitations --- ## How much is too much? -In most of the cases clickhouse don't have any hard limits. But obsiously there there are some practical limitation / barriers for different things - often they are caused by some system / network / filesystem limitation. +In most of the cases ClickHouse® don't have any hard limits. But obsiously there there are some practical limitation / barriers for different things - often they are caused by some system / network / filesystem limitation. So after reaching some limits you can get different kind of problems, usually it never a failures / errors, but different kinds of degradations (slower queries / high cpu/memory usage, extra load on the network / zookeeper etc). @@ -26,7 +26,7 @@ Fewer than number of tables (above). Dozens / hundreds is usually still acceptab ### Number of inserts per seconds -For usual (non async) inserts - dozen is enough. Every insert creates a part, if you will create parts too often, clickhouse will not be able to merge them and you will be getting 'too many parts'. +For usual (non async) inserts - dozen is enough. Every insert creates a part, if you will create parts too often, ClickHouse will not be able to merge them and you will be getting 'too many parts'. ### Number of columns in the table diff --git a/content/en/altinity-kb-schema-design/ingestion-aggregate-function.md b/content/en/altinity-kb-schema-design/ingestion-aggregate-function.md index a9fea96a17..a9158a7422 100644 --- a/content/en/altinity-kb-schema-design/ingestion-aggregate-function.md +++ b/content/en/altinity-kb-schema-design/ingestion-aggregate-function.md @@ -3,7 +3,7 @@ title: "Ingestion of AggregateFunction" linkTitle: "Ingestion of AggregateFunction" weight: 100 description: >- - ClickHouse. How to insert AggregateFunction data. + ClickHouse® - How to insert AggregateFunction data --- ## How to insert AggregateFunction data diff --git a/content/en/altinity-kb-schema-design/insert_deduplication.md b/content/en/altinity-kb-schema-design/insert_deduplication.md index 84a145a187..ea24fa16ba 100644 --- a/content/en/altinity-kb-schema-design/insert_deduplication.md +++ b/content/en/altinity-kb-schema-design/insert_deduplication.md @@ -244,7 +244,7 @@ select * from test_insert format PrettyCompactMonoBlock; ## insert_deduplication_token -Since Clikhouse 22.2 there is a new setting [insert_dedupplication_token](https://clickhouse.com/docs/en/operations/settings/settings/#insert_deduplication_token). +Since ClickHouse® 22.2 there is a new setting [insert_dedupplication_token](https://clickhouse.com/docs/en/operations/settings/settings/#insert_deduplication_token). This setting allows you to define an explicit token that will be used for deduplication instead of calculating a checksum from the inserted data. ```sql diff --git a/content/en/altinity-kb-schema-design/materialized-views/_index.md b/content/en/altinity-kb-schema-design/materialized-views/_index.md index a92f6dd86b..a7d0579758 100644 --- a/content/en/altinity-kb-schema-design/materialized-views/_index.md +++ b/content/en/altinity-kb-schema-design/materialized-views/_index.md @@ -5,7 +5,7 @@ description: > MATERIALIZED VIEWS --- {{% alert title="Info" color="info" %}} -MATERIALIZED VIEWs in ClickHouse behave like AFTER INSERT TRIGGER to the left-most table listed in its SELECT statement. +MATERIALIZED VIEWs in ClickHouse® behave like AFTER INSERT TRIGGER to the left-most table listed in its SELECT statement. {{% /alert %}} diff --git a/content/en/altinity-kb-schema-design/materialized-views/backfill-populate-mv-in-a-controlled-manner.md b/content/en/altinity-kb-schema-design/materialized-views/backfill-populate-mv-in-a-controlled-manner.md index cb253d5f3a..92dfa67e9b 100644 --- a/content/en/altinity-kb-schema-design/materialized-views/backfill-populate-mv-in-a-controlled-manner.md +++ b/content/en/altinity-kb-schema-design/materialized-views/backfill-populate-mv-in-a-controlled-manner.md @@ -24,7 +24,7 @@ INSERT INTO mv_import SELECT * FROM huge_table WHERE toYYYYMM(ts) = 202105; ALTER TABLE mv ATTACH PARTITION ID '202105' FROM mv_import; ``` -See also [https://clickhouse.tech/docs/en/sql-reference/statements/alter/partition/\#alter_attach-partition-from](https://clickhouse.tech/docs/en/sql-reference/statements/alter/partition/\#alter_attach-partition-from). +See also [the ClickHouse® documentation on Manipulating Partitions and Parts](https://clickhouse.com/docs/en/sql-reference/statements/alter/partition). Q. I still do not have enough RAM to GROUP BY the whole partition. diff --git a/content/en/altinity-kb-schema-design/materialized-views/idempotent_inserts_mv.md b/content/en/altinity-kb-schema-design/materialized-views/idempotent_inserts_mv.md index 901e3fb0d6..1c31a5a94a 100644 --- a/content/en/altinity-kb-schema-design/materialized-views/idempotent_inserts_mv.md +++ b/content/en/altinity-kb-schema-design/materialized-views/idempotent_inserts_mv.md @@ -8,7 +8,7 @@ description: >- ## Why inserts into materialized views are not idempotent? -ClickHouse still does not have transactions. They will be implemented around 2022Q2. +ClickHouse® still does not have transactions. They will be implemented around 2022Q2. Because of ClickHouse materialized view is a trigger. And an insert into a table and an insert into a subordinate materialized view it's two different inserts so they are not atomic alltogether. diff --git a/content/en/altinity-kb-schema-design/preaggregations.md b/content/en/altinity-kb-schema-design/preaggregations.md index 3c1e96ac56..c36fee7101 100644 --- a/content/en/altinity-kb-schema-design/preaggregations.md +++ b/content/en/altinity-kb-schema-design/preaggregations.md @@ -3,7 +3,7 @@ title: "Pre-Aggregation approaches" linkTitle: "Pre-Aggregation approaches" weight: 100 description: >- - ETL vs Materialized Views vs Projections in ClickHouse. + ETL vs Materialized Views vs Projections in ClickHouse® --- ## Pre-Aggregation approaches: ETL vs Materialized Views vs Projections diff --git a/content/en/altinity-kb-schema-design/row-level-deduplication.md b/content/en/altinity-kb-schema-design/row-level-deduplication.md index 2f26edac6b..c9556f6d9d 100644 --- a/content/en/altinity-kb-schema-design/row-level-deduplication.md +++ b/content/en/altinity-kb-schema-design/row-level-deduplication.md @@ -1,12 +1,12 @@ --- -title: "ClickHouse row-level deduplication" -linkTitle: "ClickHouse row-level deduplication" +title: "ClickHouse® row-level deduplication" +linkTitle: "ClickHouse® row-level deduplication" weight: 100 description: >- - ClickHouse row-level deduplication. + ClickHouse® row-level deduplication. --- -## ClickHouse row-level deduplication. +## ClickHouse® row-level deduplication. (Block level deduplication exists in Replicated tables, and is not the subject of that article). @@ -26,7 +26,7 @@ Approach 0. Make deduplication before ingesting data to ClickHouse + you have full control - extra coding and 'moving parts', storing some ids somewhere + clean and simple schema and selects in ClickHouse -! check if row exists in clickhouse before insert can give non-satisfing results if you use ClickHouse cluster (i.e. Replicated / Distributed tables) - due to eventual consistency. +! check if row exists in ClickHouse before insert can give non-satisfing results if you use ClickHouse cluster (i.e. Replicated / Distributed tables) - due to eventual consistency. Approach 1. Allow duplicates during ingestion. Remove them on SELECT level (by things like GROUP BY) + simple inserts @@ -44,7 +44,7 @@ Approach 2. Eventual deduplication using Replacing Approach 3. Eventual deduplication using Collapsing - complicated - can force you to use suboptimal primary key (which will guarantee record uniqueness) - - you need to store previous state of the record somewhere, or extract it before ingestion from clickhouse + - you need to store previous state of the record somewhere, or extract it before ingestion from ClickHouse - deduplication is eventual (same as with Replacing) + you can make the proper aggregations of last state w/o FINAL (bookkeeping-alike sums, counts etc) diff --git a/content/en/altinity-kb-schema-design/two-columns-indexing.md b/content/en/altinity-kb-schema-design/two-columns-indexing.md index 34145a0f4d..1b5f7bc6e2 100644 --- a/content/en/altinity-kb-schema-design/two-columns-indexing.md +++ b/content/en/altinity-kb-schema-design/two-columns-indexing.md @@ -22,7 +22,7 @@ select * from X where B = '0123456789' and ts between ...; and both A & B are high-cardinality values -ClickHouse primary skip index (ORDER BY/PRIMARY KEY) work great when you always include leading ORDER BY columns in WHERE filter. There is an exceptions for low-cardinality columns and high-correlated values, but here is another case. A & B both high cardinality and seems that their correlation is at medium level. +ClickHouse® primary skip index (ORDER BY/PRIMARY KEY) work great when you always include leading ORDER BY columns in WHERE filter. There is an exceptions for low-cardinality columns and high-correlated values, but here is another case. A & B both high cardinality and seems that their correlation is at medium level. Various solutions exist, and their effectiveness largely depends on the correlation of different column data. It is necessary to test all solutions on actual data to select the best one. diff --git a/content/en/altinity-kb-setup-and-maintenance/_index.md b/content/en/altinity-kb-setup-and-maintenance/_index.md index 1c3df79b50..5184ed2241 100644 --- a/content/en/altinity-kb-setup-and-maintenance/_index.md +++ b/content/en/altinity-kb-setup-and-maintenance/_index.md @@ -7,6 +7,6 @@ keywords: - monitor clickhouse - data migration description: > - Learn how to set up, deploy, monitor, and backup ClickHouse with step-by-step guides. + Learn how to set up, deploy, monitor, and backup ClickHouse® with step-by-step guides. weight: 5 --- diff --git a/content/en/altinity-kb-setup-and-maintenance/alters.md b/content/en/altinity-kb-setup-and-maintenance/alters.md index f0e5f22a32..6661e8ca81 100644 --- a/content/en/altinity-kb-setup-and-maintenance/alters.md +++ b/content/en/altinity-kb-setup-and-maintenance/alters.md @@ -1,12 +1,12 @@ --- -title: "How ALTER's works in ClickHouse" -linkTitle: "How ALTER's works in ClickHouse" +title: "How ALTERs work in ClickHouse®" +linkTitle: "How ALTERs work in ClickHouse®" weight: 100 description: >- --- -### How ALTER's works in ClickHouse: +### How ALTERs work in ClickHouse®: #### ADD (COLUMN/INDEX/PROJECTION) diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-aggressive_merges.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-aggressive_merges.md index c1daf437d8..ead7d010a0 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-aggressive_merges.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-aggressive_merges.md @@ -7,7 +7,7 @@ description: > -Q: Is there any way I can dedicate more resources to the merging process when running ClickHouse on pretty beefy machines (like 36 cores, 1TB of RAM, and large NVMe disks)? +Q: Is there any way I can dedicate more resources to the merging process when running ClickHouse® on pretty beefy machines (like 36 cores, 1TB of RAM, and large NVMe disks)? Mostly such things doing by changing the level of parallelism: @@ -28,7 +28,7 @@ Additionally, you can: - review the schema, especially codes/compression used (they allow to reduce the size, but often can impact the merge speed significantly). - try to form bigger parts when doing inserts (min_insert_block_size_bytes / min_insert_block_size_rows / max_insert_block_size) - check if wide (every column in a separate file) or compact (columns are mixed in one file) parts are used (system.parts). By default min_bytes_for_wide_part=10 mln rows (so if the part is bigger that that the wide format will be used, compact otherwise). Sometimes it can be beneficial to use a compact format even for bigger parts (a lot of relatively small columns) or oppositely - use a wide format even for small parts (few fat columns in the table). - - consider using recent clickhouse releases - they use compressed marks by default, which can be beneficial for reducing the i/o + - consider using recent ClickHouse releases - they use compressed marks by default, which can be beneficial for reducing the i/o All the adjustments/performance optimizations should be controlled by some reproducible 'benchmark' so you can control/prove that the change gives the expected result (sometimes it's quite hard to predict the impact of some change on the real system). Please also monitors how system resources (especially CPU, IO + for replicated tables: network & zookeeper) are used/saturated during the test. Also monitor/plot the usage of the pools: ``` diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-check-replication-ddl-queue.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-check-replication-ddl-queue.md index 522339740a..9b07d7175e 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-check-replication-ddl-queue.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-check-replication-ddl-queue.md @@ -46,7 +46,7 @@ WHERE ## Common problems & solutions -- If the replication queue does not have any Exceptions only postponed reasons without exceptions just leave ClickHouse do Merges/Mutations and it will eventually catch up and reduce the number of tasks in `replication_queue`. Number of concurrent merges and fetches can be tuned but if it is done without an analysis of your workload then you may end up in a worse situation. If Delay in queue is going up actions may be needed: +- If the replication queue does not have any Exceptions only postponed reasons without exceptions just leave ClickHouse® do Merges/Mutations and it will eventually catch up and reduce the number of tasks in `replication_queue`. Number of concurrent merges and fetches can be tuned but if it is done without an analysis of your workload then you may end up in a worse situation. If Delay in queue is going up actions may be needed: - First simplest approach: - try to `SYSTEM RESTART REPLICA db.table` (This will DETACH/ATTACH table internally) diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-clickhouse-in-docker.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-clickhouse-in-docker.md index cf6bc5bbaf..4adc6df5e3 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-clickhouse-in-docker.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-clickhouse-in-docker.md @@ -1,8 +1,8 @@ --- -title: "ClickHouse in Docker" -linkTitle: "ClickHouse in Docker" +title: "ClickHouse® in Docker" +linkTitle: "ClickHouse® in Docker" description: > - ClickHouse in Docker + ClickHouse® in Docker --- ## Do you have documentation on Docker deployments? @@ -21,7 +21,7 @@ Important things: * Also, you may mount in some files or folders in the configuration folder: * `/etc/clickhouse-server/config.d/listen_ports.xml` * `--ulimit nofile=262144:262144` -* You can also set on some linux capabilities to enable some of extra features of ClickHouse (not obligatory): `SYS_PTRACE NET_ADMIN IPC_LOCK SYS_NICE` +* You can also set on some linux capabilities to enable some of extra features of ClickHouse® (not obligatory): `SYS_PTRACE NET_ADMIN IPC_LOCK SYS_NICE` * you may also mount in the folder `/docker-entrypoint-initdb.d/` - all SQL or bash scripts there will be executed during container startup. * if you use cgroup limits - it may misbehave https://github.com/ClickHouse/ClickHouse/issues/2261 (set up `` manually) * there are several ENV switches, see: [https://github.com/ClickHouse/ClickHouse/blob/master/docker/server/entrypoint.sh](https://github.com/ClickHouse/ClickHouse/blob/master/docker/server/entrypoint.sh) diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-converting-mergetree-to-replicated.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-converting-mergetree-to-replicated.md index b0591084aa..d8ae3e65b8 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-converting-mergetree-to-replicated.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-converting-mergetree-to-replicated.md @@ -14,7 +14,7 @@ Options here are: ## example for option 2 -Note: ATTACH PARTITION ID 'bar' FROM 'foo'` is practically free from compute and disk space perspective. This feature utilizes filesystem hard-links and the fact that files are immutable in ClickHouse ( it's the core of the ClickHouse design, filesystem hard-links and such file manipulations are widely used ). +Note: ATTACH PARTITION ID 'bar' FROM 'foo'` is practically free from compute and disk space perspective. This feature utilizes filesystem hard-links and the fact that files are immutable in ClickHouse® ( it's the core of the ClickHouse design, filesystem hard-links and such file manipulations are widely used ). ```sql create table foo( A Int64, D Date, S String ) diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/_index.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/_index.md index 9b211b320f..17e00be60a 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/_index.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/_index.md @@ -11,11 +11,11 @@ Pros: Cons: * Decoding & encoding of common data formats may be slower / require more CPU -* The data size is usually bigger than ClickHouse formats. +* The data size is usually bigger than ClickHouse® formats. * Some of the common data formats have limitations. {{% alert title="Info" color="info" %}} -The best approach to do that is using clickhouse-client, in that case, encoding/decoding of format happens client-side, while client and server speak clickhouse Native format (columnar & compressed). +The best approach to do that is using `clickhouse-client`, in that case, encoding/decoding of format happens client-side, while client and server speak ClickHouse Native format (columnar & compressed). In contrast: when you use HTTP protocol, the server do encoding/decoding and more data is passed between client and server. {{% /alert %}} @@ -56,7 +56,7 @@ Internally it works like smart `INSERT INTO cluster(…) SELECT * FROM ...` with {{% /alert %}} {{% alert title="Info" color="info" %}} -Run clickhouse copier on the same nodes as receiver clickhouse, to avoid doubling the network load. +Run `clickhouse copier` on the same nodes as receiver ClickHouse, to avoid doubling the network load. {{% /alert %}} See details in: @@ -91,7 +91,7 @@ Cons: Just create the backup on server 1, upload it to server 2, and restore the backup. -See [https://github.com/AlexAkulov/clickhouse-backup](https://github.com/AlexAkulov/clickhouse-backup) +See [the Altinity Backup for ClickHouse repo](https://github.com/Altinity/clickhouse-backup) for the code and the details. [https://altinity.com/blog/introduction-to-clickhouse-backups-and-clickhouse-backup](https://altinity.com/blog/introduction-to-clickhouse-backups-and-clickhouse-backup) @@ -102,7 +102,7 @@ Pros: Cons: * Table schema should be the same. -* Works only when the source and the destination clickhouse servers share the same zookeeper (without chroot) +* Works only when the source and the destination ClickHouse servers share the same zookeeper (without chroot) * Needs to access zookeeper and ClickHouse replication ports: (`interserver_http_port` or `interserver_https_port`) ```sql diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/add_remove_replica.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/add_remove_replica.md index 7d56f6c5d0..2f9b8eb46d 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/add_remove_replica.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/add_remove_replica.md @@ -1,11 +1,11 @@ --- -title: "Add/Remove a new replica to a ClickHouse cluster" +title: "Add/Remove a new replica to a ClickHouse® cluster" linkTitle: "add_remove_replica" description: > How to add/remove a new replica manually and using clickhouse-backup --- -## ADD nodes/Replicas to a Cluster +## ADD nodes/replicas to a ClickHouse® cluster To add some replicas to an existing cluster if -30TB then better to use replication: @@ -92,9 +92,9 @@ clickhouse-client --host localhost --port 9000 -mn < databases.sql clickhouse-client --host localhost --port 9000 -mn < schema.sql ``` -### Using clickhouse-backup +### Using `clickhouse-backup` -- Using clickhouse-backup to copy the schema of a replica to another is also convenient and moreover if using Atomic database with `{uuid}` macros in ReplicatedMergeTree engines: +- Using `clickhouse-backup` to copy the schema of a replica to another is also convenient and moreover if using Atomic database with `{uuid}` macros in ReplicatedMergeTree engines: ```bash sudo -u clickhouse clickhouse-backup --schema --rbac create_remote full-replica diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/_index.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/_index.md index 75adb326bb..6f45314a1c 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/_index.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/_index.md @@ -4,11 +4,11 @@ linkTitle: "clickhouse-copier" description: > clickhouse-copier --- -The description of the utility and its parameters, as well as examples of the config files that you need to create for the copier are in the official doc [ClickHouse copier utility](https://clickhouse.com/docs/en/operations/utilities/clickhouse-copier/) +The description of the utility and its parameters, as well as examples of the config files that you need to create for the copier are in the official doc [ClickHouse® copier utility](https://clickhouse.com/docs/en/operations/utilities/clickhouse-copier/) The steps to run a task: -1. Create a config file for clickhouse-copier (zookeeper.xml) +1. Create a config file for `clickhouse-copier` (zookeeper.xml) [ZooKeeper config format](https://clickhouse.com/docs/en/operations/utilities/clickhouse-copier/#format-of-zookeeper-xml) @@ -16,7 +16,7 @@ The steps to run a task: [Copy task configuration](https://clickhouse.com/docs/en/operations/utilities/clickhouse-copier/#configuration-of-copying-tasks) -3. Create the task in ZooKeeper and start an instance of clickhouse-copier +3. Create the task in ZooKeeper and start an instance of `clickhouse-copier` `clickhouse-copier --daemon --base-dir=/opt/clickhouse-copier --config=/opt/clickhouse-copier/zookeeper.xml --task-path=/clickhouse/copier/task1 --task-file=/opt/clickhouse-copier/task1.xml` @@ -24,7 +24,7 @@ The steps to run a task: `clickhouse-copier --daemon --base-dir=/opt/clickhouse-copier --config=/opt/clickhouse-copier/zookeeper.xml --task-path=/clickhouse/copier/task1 --task-file=/opt/clickhouse-copier/task1.xml --task-upload-force=1` - If you want to run another instance of clickhouse-copier for the same task, you need to copy the config file (zookeeper.xml) to another server, and run this command: + If you want to run another instance of `clickhouse-copier` for the same task, you need to copy the config file (zookeeper.xml) to another server, and run this command: `clickhouse-copier --daemon --base-dir=/opt/clickhouse-copier --config=/opt/clickhouse-copier/zookeeper.xml --task-path=/clickhouse/copier/task1` diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/altinity-kb-clickhouse-copier-kubernetes-job.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/altinity-kb-clickhouse-copier-kubernetes-job.md index d5925bf5e0..ba638f5cf2 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/altinity-kb-clickhouse-copier-kubernetes-job.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/altinity-kb-clickhouse-copier/altinity-kb-clickhouse-copier-kubernetes-job.md @@ -2,9 +2,9 @@ title: "Kubernetes job for clickhouse-copier" linkTitle: "Kubernetes job for clickhouse-copier" description: > - Kubernetes job for clickhouse-copier + Kubernetes job for `clickhouse-copier` --- -# ClickHouse-copier deployment in kubernetes +# `clickhouse-copier` deployment in kubernetes `clickhouse-copier` can be deployed in a kubernetes environment to automate some simple backups or copy fresh data between clusters. @@ -166,13 +166,13 @@ and deploy: kubectl -n clickhouse-copier create -f ./kubernetes/copier-configmap.yaml ``` -The ```task01.xml``` file has many parameters to take into account explained in the [clickhouse-copier documentation](https://clickhouse.com/docs/en/operations/utilities/clickhouse-copier/). Important to note that it is needed a FQDN for the Zookeeper nodes and ClickHouse server that are valid for the cluster. As the deployment creates a new namespace, it is recommended to use a FQDN linked to a service. For example ```zookeeper01.svc.cluster.local```. This file should be adapted to both clusters topologies and to the needs of the user. +The ```task01.xml``` file has many parameters to take into account explained in the [clickhouse-copier documentation](https://clickhouse.com/docs/en/operations/utilities/clickhouse-copier/). Important to note that it is needed a FQDN for the Zookeeper nodes and ClickHouse® server that are valid for the cluster. As the deployment creates a new namespace, it is recommended to use a FQDN linked to a service. For example ```zookeeper01.svc.cluster.local```. This file should be adapted to both clusters topologies and to the needs of the user. The ```zookeeper.xml``` file is pretty straightforward with a simple 3 node ensemble configuration. ### 3) Create the job: -Basically the job will download the official clickhouse image and will create a pod with 2 containers: +Basically the job will download the official ClickHouse image and will create a pod with 2 containers: - clickhouse-copier: This container will run the clickhouse-copier utility. diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/distributed-table-cluster.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/distributed-table-cluster.md index 8f49493707..95e891f793 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/distributed-table-cluster.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/distributed-table-cluster.md @@ -5,7 +5,7 @@ description: > Distributed table to cluster --- -In order to shift INSERTS to a standby cluster (for example increase zone availability or disaster recovery) some ClickHouse features can be used. +In order to shift INSERTS to a standby cluster (for example increase zone availability or disaster recovery) some ClickHouse® features can be used. Basically we need to create a distributed table, a MV, rewrite the `remote_servers.xml` config file and tune some parameters. diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/mssql-clickhouse.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/mssql-clickhouse.md index 7ddefa753f..4e4d37cf74 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/mssql-clickhouse.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/mssql-clickhouse.md @@ -1,12 +1,12 @@ --- title: "MSSQL bcp pipe to clickhouse-client" -linkTitle: "Export from MSSQL to ClickHouse" +linkTitle: "Export from MSSQL to ClickHouse®" weight: 100 description: >- - Export from MSSQL to ClickHouse + Export from MSSQL to ClickHouse® --- -## How to pipe data from bcp export tool for MSSQL database +## How to pipe data to ClickHouse® from bcp export tool for MSSQL database ### Prepare tables diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/remote-table-function.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/remote-table-function.md index 3f9446be46..3c4b39c300 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/remote-table-function.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/remote-table-function.md @@ -33,7 +33,7 @@ While of course it should be checked, every case is different. Yes, by the cost of extra memory usage (on the receiver side). -ClickHouse tries to form blocks of data in memory and while one of limit: `min_insert_block_size_rows` or `min_insert_block_size_bytes` being hit, ClickHouse dump this block on disk. If ClickHouse tries to execute insert in parallel (`max_insert_threads > 1`), it would form multiple blocks at one time. +ClickHouse® tries to form blocks of data in memory and while one of limit: `min_insert_block_size_rows` or `min_insert_block_size_bytes` being hit, ClickHouse dump this block on disk. If ClickHouse tries to execute insert in parallel (`max_insert_threads > 1`), it would form multiple blocks at one time. So maximum memory usage can be calculated like this: `max_insert_threads * first(min_insert_block_size_rows OR min_insert_block_size_bytes)` Default values: diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/rsync.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/rsync.md index 4094148b4c..8e0434074f 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/rsync.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/rsync.md @@ -6,7 +6,7 @@ description: > --- ### Short Instructions -These instructions apply to ClickHouse using default locations for storage. +These instructions apply to ClickHouse® using default locations for storage. 1. Do [FREEZE TABLE](https://clickhouse.tech/docs/en/sql-reference/statements/alter/partition/#alter_freeze-partition) on needed table, partition. It produces a consistent snapshot of table data. 2. Run rsync command. diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-ddlworker/_index.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-ddlworker/_index.md index 052701a190..719b0782e0 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-ddlworker/_index.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-ddlworker/_index.md @@ -4,11 +4,11 @@ linkTitle: "DDLWorker" description: > DDLWorker --- -DDLWorker is a subprocess (thread) of clickhouse-server that executes `ON CLUSTER` tasks at the node. +DDLWorker is a subprocess (thread) of `clickhouse-server` that executes `ON CLUSTER` tasks at the node. When you execute a DDL query with `ON CLUSTER mycluster` section the query executor at the current node reads the cluster `mycluster` definition (remote_servers / system.clusters) and places tasks into Zookeeper znode `task_queue/ddl/...` for members of the cluster `mycluster`. -DDLWorker at all ClickHouse nodes constantly check this `task_queue` for their tasks and executes them locally and reports about a result back into `task_queue`. +DDLWorker at all ClickHouse® nodes constantly check this `task_queue` for their tasks and executes them locally and reports about a result back into `task_queue`. The common issue is the different hostnames/IPAddresses in the cluster definition and locally. diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-ddlworker/there-are-n-unfinished-hosts-0-of-them-are-currently-active.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-ddlworker/there-are-n-unfinished-hosts-0-of-them-are-currently-active.md index 6b2a655f16..9fec259ba0 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-ddlworker/there-are-n-unfinished-hosts-0-of-them-are-currently-active.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-ddlworker/there-are-n-unfinished-hosts-0-of-them-are-currently-active.md @@ -2,13 +2,13 @@ title: "There are N unfinished hosts (0 of them are currently active)." linkTitle: "There are N unfinished hosts (0 of them are currently active)." description: > - "There are N unfinished hosts (0 of them are currently active)." + There are N unfinished hosts (0 of them are currently active). --- Sometimes your Distributed DDL queries are being stuck, and not executing on all or subset of nodes, there are a lot of possible reasons for that kind of behavior, so it would take some time and effort to investigate. ## Possible reasons -### ClickHouse node can't recognize itself +### ClickHouse® node can't recognize itself ```sql SELECT * FROM system.clusters; -- check is_local column, it should have 1 for itself @@ -24,7 +24,7 @@ cat /etc/hostname ### Debian / Ubuntu -There is an issue in Debian based images, when hostname being mapped to 127.0.1.1 address which doesn't literally match network interface and clickhouse fails to detect this address as local. +There is an issue in Debian based images, when hostname being mapped to 127.0.1.1 address which doesn't literally match network interface and ClickHouse fails to detect this address as local. [https://github.com/ClickHouse/ClickHouse/issues/23504](https://github.com/ClickHouse/ClickHouse/issues/23504) diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-memory-overcommit.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-memory-overcommit.md index c39ccd1b27..0457408453 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-memory-overcommit.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-memory-overcommit.md @@ -7,7 +7,7 @@ description: > ## Memory Overcommiter -From version 22.2+ [ClickHouse was updated with enhanced Memory overcommit capabilities](https://github.com/ClickHouse/ClickHouse/pull/31182). In the past, queries were constrained by the `max_memory_usage` setting, imposing a rigid limitation. Users had the option to increase this limit, but it came at the potential expense of impacting other users during a single query. With the introduction of Memory overcommit, more memory-intensive queries can now execute, granted there are ample resources available. When the [server reaches its maximum memory limit](https://clickhouse.com/docs/en/operations/server-configuration-parameters/settings#max_server_memory_usage), ClickHouse identifies the most overcommitted queries and attempts to terminate them. It's important to note that the terminated query might not be the one causing the condition. If it's not, the query will undergo a waiting period to allow the termination of the high-memory query before resuming its execution. This setup ensures that low-memory queries always have the opportunity to run, while more resource-intensive queries can execute during server idle times when resources are abundant. Users have the flexibility to fine-tune this behavior at both the server and user levels. +From version 22.2+ [ClickHouse® was updated with enhanced Memory overcommit capabilities](https://github.com/ClickHouse/ClickHouse/pull/31182). In the past, queries were constrained by the `max_memory_usage` setting, imposing a rigid limitation. Users had the option to increase this limit, but it came at the potential expense of impacting other users during a single query. With the introduction of Memory overcommit, more memory-intensive queries can now execute, granted there are ample resources available. When the [server reaches its maximum memory limit](https://clickhouse.com/docs/en/operations/server-configuration-parameters/settings#max_server_memory_usage), ClickHouse identifies the most overcommitted queries and attempts to terminate them. It's important to note that the terminated query might not be the one causing the condition. If it's not, the query will undergo a waiting period to allow the termination of the high-memory query before resuming its execution. This setup ensures that low-memory queries always have the opportunity to run, while more resource-intensive queries can execute during server idle times when resources are abundant. Users have the flexibility to fine-tune this behavior at both the server and user levels. If the memory overcommitter is not being used you'll get something like this: diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-monitoring.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-monitoring.md index e20f062490..074f2287ed 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-monitoring.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-monitoring.md @@ -1,11 +1,11 @@ --- -title: "ClickHouse Monitoring" -linkTitle: "ClickHouse Monitoring" +title: "ClickHouse® Monitoring" +linkTitle: "ClickHouse® Monitoring" description: > - ClickHouse Monitoring + ClickHouse® Monitoring --- -## ClickHouse Monitoring +## ClickHouse® Monitoring Monitoring helps to track potential issues in your cluster before they cause a critical error. @@ -45,32 +45,32 @@ The following metrics should be collected / monitored * Enable [embedded exporter](https://clickhouse.com/docs/en/operations/server-configuration-parameters/settings/#server_configuration_parameters-prometheus) * Grafana dashboards [https://grafana.com/grafana/dashboards/14192](https://grafana.com/grafana/dashboards/14192) or [https://grafana.com/grafana/dashboards/13500](https://grafana.com/grafana/dashboards/13500) -### Prometheus (embedded http handler with clickhouse-operator style metrics) + Grafana +### Prometheus (embedded http handler with Altinity Kubernetes Operator for ClickHouse style metrics) + Grafana * Enable [http handler](../monitoring-operator-exporter-compatibility/) -* Useful, if you want to use clickhouse-operator dashboard, but do not run ClickHouse in k8s. +* Useful, if you want to use the dashboard from the Altinity Kubernetes Operator for ClickHouse, but do not run ClickHouse in k8s. -### Prometheus (clickhouse-operator embedded exporter) + Grafana +### Prometheus (embedded exporter in the Altinity Kubernetes Operator for ClickHouse) + Grafana -* exporter is included in clickhouse-operator, and enabled automatically +* exporter is included in the Altinity Kubernetes Operator for ClickHouse, and enabled automatically * see instructions of [Prometheus](https://github.com/Altinity/clickhouse-operator/blob/eb3fc4e28514d0d6ea25a40698205b02949bcf9d/docs/prometheus_setup.md) and [Grafana](https://github.com/Altinity/clickhouse-operator/blob/eb3fc4e28514d0d6ea25a40698205b02949bcf9d/docs/grafana_setup.md) installation (if you don't have one) * Grafana dashboard [https://github.com/Altinity/clickhouse-operator/tree/master/grafana-dashboard](https://github.com/Altinity/clickhouse-operator/tree/master/grafana-dashboard) * Prometheus alerts [https://github.com/Altinity/clickhouse-operator/blob/master/deploy/prometheus/prometheus-alert-rules-clickhouse.yaml](https://github.com/Altinity/clickhouse-operator/blob/master/deploy/prometheus/prometheus-alert-rules-clickhouse.yaml) -### Prometheus (clickhouse external exporter) + Grafana +### Prometheus (ClickHouse external exporter) + Grafana * [clickhouse-exporter](https://github.com/ClickHouse/clickhouse_exporter) * Dashboard: https://grafana.com/grafana/dashboards/882 (unmaintained) -### Dashboards quering clickhouse directly via vertamedia / Altinity plugin +### Dashboards querying ClickHouse directly via vertamedia / Altinity plugin * Overview: [https://grafana.com/grafana/dashboards/13606](https://grafana.com/grafana/dashboards/13606) * Queries dashboard (analyzing system.query_log) https://grafana.com/grafana/dashboards/2515 -## Dashboard quering clickhouse directly via Grafana plugin +## Dashboard querying ClickHouse directly via Grafana plugin * https://grafana.com/blog/2022/05/05/introducing-the-official-clickhouse-plugin-for-grafana/ diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-moving-table-to-another-device..md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-moving-table-to-another-device..md index 8064fc754e..9303dccce4 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-moving-table-to-another-device..md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-moving-table-to-another-device..md @@ -1,12 +1,12 @@ --- -title: "Moving a table to another device." -linkTitle: "Moving a table to another device." +title: "Moving a table to another device" +linkTitle: "Moving a table to another device" description: > Moving a table to another device. --- Suppose we mount a new device at path `/mnt/disk_1` and want to move `table_4` to it. -1. Create directory on new device for ClickHouse data. /in shell `mkdir /mnt/disk_1/clickhouse` +1. Create directory on new device for ClickHouse® data. /in shell `mkdir /mnt/disk_1/clickhouse` 2. Change ownership of created directory to ClickHouse user. /in shell `chown -R clickhouse:clickhouse /mnt/disk_1/clickhouse` 3. Create a special storage policy which should include both disks: old and new. /in shell diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-s3-object-storage/aws-s3-recipes.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-s3-object-storage/aws-s3-recipes.md index 2f2ee6db3c..2af67b7832 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-s3-object-storage/aws-s3-recipes.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-s3-object-storage/aws-s3-recipes.md @@ -31,7 +31,7 @@ Role shall contain a policy with permissions like: } ``` -Corresponding configuration of ClickHouse: +Corresponding configuration of ClickHouse®: ```xml @@ -65,8 +65,9 @@ SELECT * FROM table_s3; DROP TABLE table_s3; ``` -## How to use AWS IRSA and IAM in Altinity Kubernetes clickhouse-operator to allow S3 backup without Explicit credentials -Install clickhouse-operator https://github.com/Altinity/clickhouse-operator/tree/master/docs/operator_installation_details.md +## How to use AWS IRSA and IAM in the Altinity Kubernetes Operator for ClickHouse to allow S3 backup without Explicit credentials + +Install `clickhouse-operator` https://github.com/Altinity/clickhouse-operator/tree/master/docs/operator_installation_details.md Create Role and IAM Policy, look details in https://docs.aws.amazon.com/emr/latest/EMR-on-EKS-DevelopmentGuide/setting-up-enable-IAM.html diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-s3-object-storage/s3_and_mutations.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-s3-object-storage/s3_and_mutations.md index 1c5c32500e..db306828b6 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-s3-object-storage/s3_and_mutations.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-s3-object-storage/s3_and_mutations.md @@ -3,7 +3,7 @@ title: "How much data are written to S3 during mutations" linkTitle: "s3 and mutations" weight: 100 description: >- - Example of how much data ClickHouse reads and writes to s3 during mutations. + Example of how much data ClickHouse® reads and writes to s3 during mutations. --- ## Configuration diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-s3-object-storage/s3_cache_example.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-s3-object-storage/s3_cache_example.md index cf834ac01d..e7db2700cc 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-s3-object-storage/s3_cache_example.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-s3-object-storage/s3_cache_example.md @@ -249,7 +249,7 @@ alter table mydata move partition id '202301' to volume 's3cached'; └───────────┴───────────┴───────────┴────────────┴────────────┘ ``` -## S3 and ClickHouse start time +## S3 and ClickHouse® start time Let's create a table with 1000 parts and move them to s3. ```sql diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-s3-object-storage/s3disk.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-s3-object-storage/s3disk.md index c267ead8a1..38168d365f 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-s3-object-storage/s3disk.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-s3-object-storage/s3disk.md @@ -27,7 +27,7 @@ description: >- * skip_access_check — if true, it's possible to use read only credentials with regular MergeTree table. But you would need to disable merges (`prefer_not_to_merge` setting) on s3 volume as well. -* send_metadata — if true, ClickHouse will populate s3 object with initial part & file path, which allow you to recover metadata from s3 and make debug easier. +* send_metadata — if true, ClickHouse® will populate s3 object with initial part & file path, which allow you to recover metadata from s3 and make debug easier. ## Restore metadata from S3 diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-server-config-files.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-server-config-files.md index edc07bbb5b..37a87a33be 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-server-config-files.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-server-config-files.md @@ -2,12 +2,12 @@ title: "Server config files" linkTitle: "Server config files" description: > - How to manage server config files in ClickHouse + How to manage server config files in ClickHouse® --- ## Сonfig management (recommended structure) -ClickHouse server config consists of two parts server settings (config.xml) and users settings (users.xml). +ClickHouse® server config consists of two parts server settings (config.xml) and users settings (users.xml). By default they are stored in the folder **/etc/clickhouse-server/** in two files config.xml & users.xml. @@ -324,4 +324,4 @@ You can verify that your changes are valid by checking **/var/lib/clickhouse/pre If something wrong with with your settings e.g. unclosed XML element or typo you can see alerts about this mistakes in **/var/log/clickhouse-server/clickhouse-server.log** -If you see your changes in **preprocessed_configs** it does not mean that changes are applied on running server, check [Settings & restart](altinity-kb-server-config-files.md#Settings-%26--restart) +If you see your changes in **preprocessed_configs** it does not mean that changes are applied on running server, check Settings and restart. \ No newline at end of file diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-settings-to-adjust.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-settings-to-adjust.md index eef6b585cf..243762e137 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-settings-to-adjust.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-settings-to-adjust.md @@ -38,7 +38,7 @@ description: > ``` -2. If you have a good monitoring outside ClickHouse you don't need to store the history of metrics in ClickHouse +2. If you have a good monitoring outside ClickHouse® you don't need to store the history of metrics in ClickHouse ```markup cat /etc/clickhouse-server/config.d/disable_metric_logs.xml diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/_index.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/_index.md index 02d001c04f..39643eab13 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/_index.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/_index.md @@ -17,7 +17,7 @@ TLDR version: 6) [monitor zookeeper](https://kb.altinity.com/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/zookeeper-monitoring/). Side note: -in many cases, the slowness of the zookeeper is actually a symptom of some issue with clickhouse schema/usage pattern (the most typical issues: an enormous number of partitions/tables/databases with real-time inserts, tiny & frequent inserts). +in many cases, the slowness of the zookeeper is actually a symptom of some issue with ClickHouse® schema/usage pattern (the most typical issues: an enormous number of partitions/tables/databases with real-time inserts, tiny & frequent inserts). ### How to install diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/altinity-kb-proper-setup.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/altinity-kb-proper-setup.md index 6644f215a9..df65037097 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/altinity-kb-proper-setup.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/altinity-kb-proper-setup.md @@ -20,7 +20,7 @@ TLDR version: 6) monitor zookeeper. Side note: -in many cases, the slowness of the zookeeper is actually a symptom of some issue with clickhouse schema/usage pattern (the most typical issues: an enormous number of partitions/tables/databases with real-time inserts, tiny & frequent inserts). +in many cases, the slowness of the zookeeper is actually a symptom of some issue with ClickHouse® schema/usage pattern (the most typical issues: an enormous number of partitions/tables/databases with real-time inserts, tiny & frequent inserts). Some doc about that subject: diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/altinity-kb-recovering-from-complete-metadata-loss-in-zookeeper.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/altinity-kb-recovering-from-complete-metadata-loss-in-zookeeper.md index de64acd474..8f7507d9c7 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/altinity-kb-recovering-from-complete-metadata-loss-in-zookeeper.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/altinity-kb-recovering-from-complete-metadata-loss-in-zookeeper.md @@ -6,7 +6,7 @@ description: > --- ## Problem -Every ClickHouse user experienced a loss of ZooKeeper one day. While the data is available and replicas respond to queries, inserts are no longer possible. ClickHouse uses ZooKeeper in order to store the reference version of the table structure and part of data, and when it is not available can not guarantee data consistency anymore. Replicated tables turn to the read-only mode. In this article we describe step-by-step instructions of how to restore ZooKeeper metadata and bring ClickHouse cluster back to normal operation. +Every ClickHouse® user experienced a loss of ZooKeeper one day. While the data is available and replicas respond to queries, inserts are no longer possible. ClickHouse uses ZooKeeper in order to store the reference version of the table structure and part of data, and when it is not available can not guarantee data consistency anymore. Replicated tables turn to the read-only mode. In this article we describe step-by-step instructions of how to restore ZooKeeper metadata and bring ClickHouse cluster back to normal operation. In order to restore ZooKeeper we have to solve two tasks. First, we need to restore table metadata in ZooKeeper. Currently, the only way to do it is to recreate the table with the `CREATE TABLE DDL` statement. @@ -14,7 +14,7 @@ In order to restore ZooKeeper we have to solve two tasks. First, we need to rest CREATE TABLE table_name ... ENGINE=ReplicatedMergeTree('zookeeper_path','replica_name'); ``` -The second and more difficult task is to populate zookeeper with information of clickhouse data parts. As mentioned above, ClickHouse stores the reference data about all parts of replicated tables in ZooKeeper, so we have to traverse all partitions and re-attach them to the recovered replicated table in order to fix that. +The second and more difficult task is to populate zookeeper with information of ClickHouse data parts. As mentioned above, ClickHouse stores the reference data about all parts of replicated tables in ZooKeeper, so we have to traverse all partitions and re-attach them to the recovered replicated table in order to fix that. {{% alert title="Info" color="info" %}} Starting from ClickHouse version 21.7 there is SYSTEM RESTORE REPLICA command @@ -52,7 +52,7 @@ Now let’s remove metadata in zookeeper using `ZkCli.sh` at ZooKeeper host: deleteall /clickhouse/cluster_1/tables/01/table_repl ``` -And try to resync clickhouse replica state with zookeeper: +And try to resync ClickHouse replica state with zookeeper: ```sql SYSTEM RESTART REPLICA table_repl; diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/altinity-kb-zookeeper-backup.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/altinity-kb-zookeeper-backup.md index 3ee8f824ef..bd07907aca 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/altinity-kb-zookeeper-backup.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/altinity-kb-zookeeper-backup.md @@ -5,7 +5,7 @@ description: > ZooKeeper backup --- -Question: Do I need to backup Zookeeper Database, because it’s pretty important for ClickHouse? +Question: Do I need to backup Zookeeper Database, because it’s pretty important for ClickHouse®? TLDR answer: **NO, just backup ClickHouse data itself, and do SYSTEM RESTORE REPLICA during recovery to recreate zookeeper data** @@ -13,9 +13,9 @@ Details: Zookeeper does not store any data, it stores the STATE of the distributed system ("that replica have those parts", "still need 2 merges to do", "alter is being applied" etc). That state always changes, and you can not capture / backup / and recover that state in a safe manner. So even backup from few seconds ago is represending some 'old state from the past' which is INCONSISTENT with actual state of the data. -In other words - if clickhouse is working - then the state of distributed system always changes, and it's almost impossible to collect the current state of zookeeper (while you collecting it it will change many times). The only exception is 'stop-the-world' scenario - i.e. shutdown all clickhouse nodes, with all other zookeeper clients, then shutdown all the zookeeper, and only then take the backups, in that scenario and backups of zookeeper & clickhouse will be consistent. In that case restoring the backup is as simple (and is equal to) as starting all the nodes which was stopped before. But usually that scenario is very non-practical because it requires huge downtime. +In other words - if ClickHouse is working - then the state of distributed system always changes, and it's almost impossible to collect the current state of zookeeper (while you collecting it it will change many times). The only exception is 'stop-the-world' scenario - i.e. shutdown all ClickHouse nodes, with all other zookeeper clients, then shutdown all the zookeeper, and only then take the backups, in that scenario and backups of zookeeper & ClickHouse will be consistent. In that case restoring the backup is as simple (and is equal to) as starting all the nodes which was stopped before. But usually that scenario is very non-practical because it requires huge downtime. -So what to do instead? It's enought if you will backup clickhouse data itself, and to recover the state of zookeeper you can just run the command `SYSTEM RESTORE REPLICA` command **AFTER** restoring the clickhouse data itself. That will recreate the state of the replica in the zookeeper as it exists on the filesystem after backup recovery. +So what to do instead? It's enought if you will backup ClickHouse data itself, and to recover the state of zookeeper you can just run the command `SYSTEM RESTORE REPLICA` command **AFTER** restoring the ClickHouse data itself. That will recreate the state of the replica in the zookeeper as it exists on the filesystem after backup recovery. Normally Zookeeper ensemble consists of 3 nodes, which is enough to survive hardware failures. diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/clickhouse-keeper-service.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/clickhouse-keeper-service.md index cedf3d6e0f..aec2656f37 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/clickhouse-keeper-service.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/clickhouse-keeper-service.md @@ -11,7 +11,7 @@ description: >- ### installation Need to install `clickhouse-common-static` + `clickhouse-keeper` OR `clickhouse-common-static` + `clickhouse-server`. -Both OK, use the first if you don't need clickhouse server locally. +Both OK, use the first if you don't need ClickHouse® server locally. ```bash dpkg -i clickhouse-common-static_{%version}.deb clickhouse-keeper_{%version}.deb diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/clickhouse-keeper.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/clickhouse-keeper.md index 64576cebb6..2bd0936541 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/clickhouse-keeper.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/clickhouse-keeper.md @@ -15,7 +15,7 @@ See slides: https://presentations.clickhouse.com/meetup54/keeper.pdf and video Since version 23.3 we recommend using clickhouse-keeper for new installations. -Even better if you will use the latest version of clickhouse-keeper (currently it's 23.7), and it's not necessary to use the same version of clickhouse-keeper as clickhouse itself. +Even better if you will use the latest version of clickhouse-keeper (currently it's 23.7), and it's not necessary to use the same version of clickhouse-keeper as ClickHouse® itself. For existing systems that currently use Apache Zookeeper, you can consider upgrading to clickhouse-keeper especially if you will upgrade clickhouse also. @@ -41,7 +41,7 @@ To test that you need to run 3 instances of clickhouse-server (which will mimic or event single instance with config like that: [https://github.com/ClickHouse/ClickHouse/blob/master/tests/config/config.d/keeper_port.xml](https://github.com/ClickHouse/ClickHouse/blob/master/tests/config/config.d/keeper_port.xml) [https://github.com/ClickHouse/ClickHouse/blob/master/tests/config/config.d/zookeeper.xml](https://github.com/ClickHouse/ClickHouse/blob/master/tests/config/config.d/zookeeper.xml) -And point all the clickhouses (zookeeper config secton) to those nodes / ports. +And point all the ClickHouses (zookeeper config secton) to those nodes / ports. Latests version is recommended (even testing / master builds). We will be thankful for any feedback. diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/install_ubuntu.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/install_ubuntu.md index 15a6772cc1..abf0e1d579 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/install_ubuntu.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/install_ubuntu.md @@ -1,9 +1,9 @@ --- -title: "Install standalone Zookeeper for ClickHouse on Ubuntu / Debian" +title: "Install standalone Zookeeper for ClickHouse® on Ubuntu / Debian" linkTitle: "Zookeeper install on Ubuntu" weight: 100 description: >- - Install standalone Zookeeper for ClickHouse on Ubuntu / Debian. + Install standalone Zookeeper for ClickHouse® on Ubuntu / Debian. --- ## Reference script to install standalone Zookeeper for Ubuntu / Debian diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/jvm-sizes-and-garbage-collector-settings.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/jvm-sizes-and-garbage-collector-settings.md index 324d712953..a2ede2233e 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/jvm-sizes-and-garbage-collector-settings.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/jvm-sizes-and-garbage-collector-settings.md @@ -36,7 +36,7 @@ Set the Java heap size smaller than available RAM size on the node. This is very 1. `MaxGCPauseMillis=50` (by default 200) - the 'target' acceptable pause for garbage collection (milliseconds) -1. `jute.maxbuffer` limits the maximum size of znode content. By default it's 1Mb. In some usecases (lot of partitions in table) ClickHouse may need to create bigger znodes. +1. `jute.maxbuffer` limits the maximum size of znode content. By default it's 1Mb. In some usecases (lot of partitions in table) ClickHouse® may need to create bigger znodes. 1. (optional) enable GC logs: `-Xloggc:/path_to/gc.log` @@ -44,7 +44,7 @@ Set the Java heap size smaller than available RAM size on the node. This is very ## Zookeeper configurarion used by Yandex Metrika (from 2017) -The configuration used by Yandex ( [https://clickhouse.tech/docs/en/operations/tips/\#zookeeper](https://clickhouse.tech/docs/en/operations/tips/#zookeeper) ) - they use older JVM version (with `UseParNewGC` garbage collector), and tune GC logs heavily: +The configuration used by Yandex ( [https://clickhouse.com/docs/en/operations/tips#zookeeper](https://clickhouse.com/docs/en/operations/tips#zookeeper) ) - they use older JVM version (with `UseParNewGC` garbage collector), and tune GC logs heavily: ```bash JAVA_OPTS="-Xms{{ cluster.get('xms','128M') }} \ diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/zookeeper-cluster-migration-k8s-node-storage.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/zookeeper-cluster-migration-k8s-node-storage.md index 9f51f8c271..eb88c07037 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/zookeeper-cluster-migration-k8s-node-storage.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/zookeeper-cluster-migration-k8s-node-storage.md @@ -34,7 +34,7 @@ performed gracefully so that quorum is maintained during the whole operation. 1. Ensure new member joined and synced 1. (run on leader) `echo mntr | nc 127.0.0.1 2181 | grep zk_synced_followers` should be N-1 for N member cluster 1. Repeat for all other non-leader pods -1. (ClickHouse Keeper only), for Zookeeper you will need to force an election by stopping the leader +1. (ClickHouse® Keeper only), for Zookeeper you will need to force an election by stopping the leader 1. Ask the current leader to yield leadership 2. `echo ydld | nc 127.0.0.1 2181` -> should print something like `Sent yield leadership request to ...` 3. * Make sure a different leader was elected by finding your new leader diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-packaging-compatibility-greater-than-21.x-and-earlier.md b/content/en/altinity-kb-setup-and-maintenance/altinity-packaging-compatibility-greater-than-21.x-and-earlier.md index a0573c2f3b..df53d2c7a4 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-packaging-compatibility-greater-than-21.x-and-earlier.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-packaging-compatibility-greater-than-21.x-and-earlier.md @@ -6,11 +6,11 @@ description: > --- ## Working with Altinity & Yandex packaging together -Since version 21.1 Altinity switches to the same packaging as used by Yandex. That is needed for syncing things and introduces several improvements (like adding systemd service file). +Since ClickHouse® version 21.1 Altinity switches to the same packaging as used by Yandex. That is needed for syncing things and introduces several improvements (like adding systemd service file). Unfortunately, that change leads to compatibility issues - automatic dependencies resolution gets confused by the conflicting package names: both when you update ClickHouse to the new version (the one which uses older packaging) and when you want to install older altinity packages (20.8 and older). -### Installing old clickhouse version (with old packaging schema) +### Installing old ClickHouse version (with old packaging schema) When you try to install versions 20.8 or older from Altinity repo - diff --git a/content/en/altinity-kb-setup-and-maintenance/asynchronous_metrics_descr.md b/content/en/altinity-kb-setup-and-maintenance/asynchronous_metrics_descr.md index 0c997b7e4b..f3d9cfd1c2 100644 --- a/content/en/altinity-kb-setup-and-maintenance/asynchronous_metrics_descr.md +++ b/content/en/altinity-kb-setup-and-maintenance/asynchronous_metrics_descr.md @@ -13,7 +13,7 @@ jemalloc -- parameters of jemalloc allocator, they are not very useful, and not MarkCacheBytes / MarkCacheFiles -- there are cache for .mrk files (default size is 5GB), you can see is it use all 5GB or not -MemoryCode -- how much memory allocated for ClickHouse executable +MemoryCode -- how much memory allocated for ClickHouse® executable MemoryDataAndStack -- virtual memory allocated for data and stack diff --git a/content/en/altinity-kb-setup-and-maintenance/aws-ec2-storage.md b/content/en/altinity-kb-setup-and-maintenance/aws-ec2-storage.md index ffd349002a..218c975f41 100644 --- a/content/en/altinity-kb-setup-and-maintenance/aws-ec2-storage.md +++ b/content/en/altinity-kb-setup-and-maintenance/aws-ec2-storage.md @@ -9,7 +9,7 @@ aliases: # EBS -Most native choose for ClickHouse as fast storage, because it usually guarantees best throughput, IOPS, latency for reasonable price. +Most native choose for ClickHouse® as fast storage, because it usually guarantees best throughput, IOPS, latency for reasonable price. [https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-optimized.html](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-optimized.html) @@ -18,7 +18,7 @@ Most native choose for ClickHouse as fast storage, because it usually guarantees ## General Purpose SSD volumes -In usual conditions ClickHouse being limited by throughput of volumes and amount of provided IOPS doesn't make any big difference for performance starting from a certain number. So the most native choice for clickhouse is gp3 and gp2 volumes. +In usual conditions ClickHouse being limited by throughput of volumes and amount of provided IOPS doesn't make any big difference for performance starting from a certain number. So the most native choice for ClickHouse is gp3 and gp2 volumes. ‌EC2 instances also have an EBS throughput limit, it depends on the size of the EC2 instance. That means if you would attach multiple volumes which would have high potential throughput, you would be limited by your EC2 instance, so usually there is no reason to have more than 1-3 GP3 volume or 4-5 GP2 volume per node. diff --git a/content/en/altinity-kb-setup-and-maintenance/cgroups_k8s.md b/content/en/altinity-kb-setup-and-maintenance/cgroups_k8s.md index c09f460402..83433582b3 100644 --- a/content/en/altinity-kb-setup-and-maintenance/cgroups_k8s.md +++ b/content/en/altinity-kb-setup-and-maintenance/cgroups_k8s.md @@ -6,9 +6,7 @@ description: >- cgroups and kubernetes cloud providers. --- -## cgroups and kubernetes cloud providers - -Why my ClickHouse is slow after upgrade to version 22.2 and higher? +Why my ClickHouse® is slow after upgrade to version 22.2 and higher? The probable reason is that ClickHouse 22.2 started to respect cgroups (Respect cgroups limits in max_threads autodetection. [#33342](https://github.com/ClickHouse/ClickHouse/pull/33342) ([JaySon](https://github.com/JaySon-Huang)). diff --git a/content/en/altinity-kb-setup-and-maintenance/change-me.md b/content/en/altinity-kb-setup-and-maintenance/change-me.md index afcfd4d2b6..2c53dbabe3 100644 --- a/content/en/altinity-kb-setup-and-maintenance/change-me.md +++ b/content/en/altinity-kb-setup-and-maintenance/change-me.md @@ -1,13 +1,13 @@ --- -title: "Replication: Can not resolve host of another clickhouse server" -linkTitle: "Replication: Can not resolve host of another clickhouse server" +title: "Replication: Can not resolve host of another ClickHouse® server" +linkTitle: "Replication: Can not resolve host of another ClickHouse® server" weight: 100 description: >- --- ### Symptom -When configuring Replication the ClickHouse cluster nodes are experiencing communication issues, and an error message appears in the log that states that the ClickHouse host cannot be resolved. +When configuring Replication the ClickHouse® cluster nodes are experiencing communication issues, and an error message appears in the log that states that the ClickHouse host cannot be resolved. ``` DNSResolver: Cannot resolve host (xxxxx), error 0: DNS error. diff --git a/content/en/altinity-kb-setup-and-maintenance/clickhouse-backup-diff.md b/content/en/altinity-kb-setup-and-maintenance/clickhouse-backup-diff.md index 7248dfe2fe..a254e86ecc 100644 --- a/content/en/altinity-kb-setup-and-maintenance/clickhouse-backup-diff.md +++ b/content/en/altinity-kb-setup-and-maintenance/clickhouse-backup-diff.md @@ -1,12 +1,12 @@ --- -title: "differential backups using clickhouse-backup" -linkTitle: "differential backups using clickhouse-backup" +title: "Differential backups using Altinity Backup for ClickHouse®" +linkTitle: "Differential backups using Altinity Backup for ClickHouse®" description: > - differential backups using clickhouse-backup + Differential backups using Altinity Backup for ClickHouse® --- -### differential backups using clickhouse-backup +### Differential backups using Altinity Backup for ClickHouse® -1. Download the latest clickhouse-backup for your platform https://github.com/AlexAkulov/clickhouse-backup/releases +1. Download [the latest release](https://github.com/Altinity/clickhouse-backup/releases) for your platform ```bash # ubuntu / debian diff --git a/content/en/altinity-kb-setup-and-maintenance/clickhouse-backup.md b/content/en/altinity-kb-setup-and-maintenance/clickhouse-backup.md index 74537344b6..3ca54c161b 100644 --- a/content/en/altinity-kb-setup-and-maintenance/clickhouse-backup.md +++ b/content/en/altinity-kb-setup-and-maintenance/clickhouse-backup.md @@ -1,12 +1,12 @@ --- -title: "clickhouse-backup" -linkTitle: "clickhouse-backup" +title: "Altinity Backup for ClickHouse®" +linkTitle: "Altinity Backup for ClickHouse®" description: > - clickhouse-backup + backblaze + Altinity Backup for ClickHouse® + backblaze --- ### Installation and configuration -Download the latest `clickhouse-backup.tar.gz` from assets from [https://github.com/AlexAkulov/clickhouse-backup/releases](https://github.com/AlexAkulov/clickhouse-backup/releases) +Download the latest `clickhouse-backup.tar.gz` from assets from [https://github.com/Altinity/clickhouse-backup/releases](https://github.com/Altinity/clickhouse-backup/releases) This tar.gz contains a single binary of `clickhouse-backup` and an example of config file. @@ -73,7 +73,7 @@ select count() from test.test; └─────────┘ ``` -clickhouse-backup list should work without errors (it scans local and remote (s3) folders): +`clickhouse-backup list` should work without errors (it scans local and remote (s3) folders): ```bash $ sudo ./clickhouse-backup list -c config.yml diff --git a/content/en/altinity-kb-setup-and-maintenance/clickhouse-deployment-plan.md b/content/en/altinity-kb-setup-and-maintenance/clickhouse-deployment-plan.md index 6e456a6db8..8c768f34fb 100644 --- a/content/en/altinity-kb-setup-and-maintenance/clickhouse-deployment-plan.md +++ b/content/en/altinity-kb-setup-and-maintenance/clickhouse-deployment-plan.md @@ -1,16 +1,16 @@ --- -title: "Successful ClickHouse deployment plan" -linkTitle: "Successful ClickHouse deployment plan" +title: "Successful ClickHouse® deployment plan" +linkTitle: "Successful ClickHouse® deployment plan" weight: 100 description: >- - Successful ClickHouse deployment plan. + Successful ClickHouse® deployment plan --- -## Successful ClickHouse deployment plan +## Successful ClickHouse® deployment plan ### Stage 0. Build POC -1. Install single node clickhouse +1. Install single node ClickHouse - https://clickhouse.com/docs/en/getting-started/tutorial/ - https://clickhouse.com/docs/en/getting-started/install/ - https://docs.altinity.com/altinitystablebuilds/stablequickstartguide/ @@ -40,14 +40,14 @@ description: >- ### Stage 2. Preprod setup & developement -1. Install clickhouse in cluster - several nodes / VMs + zookeeper +1. Install ClickHouse in cluster - several nodes / VMs + zookeeper - https://kb.altinity.com/altinity-kb-setup-and-maintenance/cluster-production-configuration-guide/cluster-configuration-process/ - https://kb.altinity.com/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/altinity-kb-proper-setup/ - https://kb.altinity.com/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/install_ubuntu/ 2. Create good config & automate config / os / restarts (ansible / puppet etc) - https://kb.altinity.com/altinity-kb-setup-and-maintenance/altinity-kb-settings-to-adjust/ - for docker: https://kb.altinity.com/altinity-kb-setup-and-maintenance/altinity-kb-clickhouse-in-docker/ - - for k8, use clickhouse-operator OR https://kb.altinity.com/altinity-kb-kubernetes/altinity-kb-possible-issues-with-running-clickhouse-in-k8s/ + - for k8s, use the Altinity Kubernetes Operator for ClickHouse OR https://kb.altinity.com/altinity-kb-kubernetes/altinity-kb-possible-issues-with-running-clickhouse-in-k8s/ 3. Set up monitoring / log processing / alerts etc. - https://kb.altinity.com/altinity-kb-setup-and-maintenance/altinity-kb-monitoring/#build-your-own-monitoring 4. Set up users. diff --git a/content/en/altinity-kb-setup-and-maintenance/clickhouse-operator.md b/content/en/altinity-kb-setup-and-maintenance/clickhouse-operator.md index e84894a7bf..6e50f01907 100644 --- a/content/en/altinity-kb-setup-and-maintenance/clickhouse-operator.md +++ b/content/en/altinity-kb-setup-and-maintenance/clickhouse-operator.md @@ -1,11 +1,11 @@ --- -title: "ClickHouse operator" -linkTitle: "ClickHouse operator" +title: "The Altinity Kubernetes Operator for ClickHouse®" +linkTitle: "The Altinity Kubernetes Operator for ClickHouse®" weight: 100 description: >- - ClickHouse operator + The Altinity Kubernetes Operator for ClickHouse® --- -## ClickHouse operator +## The Altinity Kubernetes Operator for ClickHouse® https://github.com/Altinity/clickhouse-operator/blob/master/docs/README.md diff --git a/content/en/altinity-kb-setup-and-maintenance/clickhouse-versions.md b/content/en/altinity-kb-setup-and-maintenance/clickhouse-versions.md index 5da7d8a9d0..4654857acd 100644 --- a/content/en/altinity-kb-setup-and-maintenance/clickhouse-versions.md +++ b/content/en/altinity-kb-setup-and-maintenance/clickhouse-versions.md @@ -1,10 +1,10 @@ --- -title: "ClickHouse versions" -linkTitle: "ClickHouse versions" +title: "ClickHouse® versions" +linkTitle: "ClickHouse® versions" description: > - ClickHouse versions + ClickHouse® versions --- -## ClickHouse versioning schema +## ClickHouse® versioning schema ![ClickHouse Version Breakdown](/assets/illyustraciya_bez_nazvaniya.png) @@ -44,7 +44,7 @@ See also: [https://clickhouse.tech/docs/en/faq/operations/production/](https://c ## How do I upgrade? -Follow this KB article for [clickhouse version upgrade](https://kb.altinity.com/upgrade/) +Follow this KB article for [ClickHouse version upgrade](https://kb.altinity.com/upgrade/) ## Bugs? @@ -52,11 +52,11 @@ ClickHouse development process goes in a very high pace and has already thousand All core features are well-tested, and very stable, and code is high-quality. But as with any other software bad things may happen. Usually the most of bugs happens in the new, freshly added functionality, and in some complex combination of several features (of course all possible combinations of features just physically can’t be tested). Usually new features are adopted by the community and stabilize quickly. -### What should I do if I found a bug in clickhouse? +### What should I do if I found a bug in ClickHouse? 1. First of all: try to upgrade to the latest bugfix release Example: if you use v21.3.5.42-lts but you know that v21.3.10.1-lts already exists - start with upgrade to that. Upgrades to latest maintenance releases are smooth and safe. 2. Look for similar issues in github. Maybe the fix is on the way. 3. If you can reproduce the bug: try to isolate it - remove some pieces of query one-by-one / simplify the scenario until the issue still reproduces. This way you can figure out which part is responsible for that bug, and you can try to create [minimal reproducible example](https://stackoverflow.com/help/minimal-reproducible-example) 4. Once you have minimal reproducible example: 1. report it to github (or to Altinity Support) - 2. check if it reproduces on newer clickhouse versions + 2. check if it reproduces on newer ClickHouse versions diff --git a/content/en/altinity-kb-setup-and-maintenance/cluster-production-configuration-guide/_index.md b/content/en/altinity-kb-setup-and-maintenance/cluster-production-configuration-guide/_index.md index 5aea201bf1..f6fb8c35ec 100644 --- a/content/en/altinity-kb-setup-and-maintenance/cluster-production-configuration-guide/_index.md +++ b/content/en/altinity-kb-setup-and-maintenance/cluster-production-configuration-guide/_index.md @@ -6,7 +6,7 @@ description: > --- -Moving from a single ClickHouse server to a clustered format provides several benefits: +Moving from a single ClickHouse® server to a clustered format provides several benefits: * Replication guarantees data integrity. * Provides redundancy. diff --git a/content/en/altinity-kb-setup-and-maintenance/cluster-production-configuration-guide/cluster-configuration-faq.md b/content/en/altinity-kb-setup-and-maintenance/cluster-production-configuration-guide/cluster-configuration-faq.md index 7b208e1d43..45b4a4b483 100644 --- a/content/en/altinity-kb-setup-and-maintenance/cluster-production-configuration-guide/cluster-configuration-faq.md +++ b/content/en/altinity-kb-setup-and-maintenance/cluster-production-configuration-guide/cluster-configuration-faq.md @@ -4,9 +4,9 @@ linkTitle: "Cluster Configuration FAQ" description: > Cluster Configuration FAQ --- -## ClickHouse does not start, some other unexpected behavior happening +## ClickHouse® does not start, some other unexpected behavior happening -Check clickhouse logs, they are your friends: +Check ClickHouse logs, they are your friends: tail -n 1000 /var/log/clickhouse-server/clickhouse-server.err.log \| less tail -n 10000 /var/log/clickhouse-server/clickhouse-server.log \| less @@ -17,9 +17,9 @@ See [our knowledge base article]({{ Cluster Configuration Process --- -So you set up 3 nodes with zookeeper (zookeeper1, zookeeper2, zookeeper3 - [How to install zookeer?](https://docs.altinity.com/operationsguide/clickhouse-zookeeper/)), and and 4 nodes with ClickHouse (clickhouse-sh1r1,clickhouse-sh1r2,clickhouse-sh2r1,clickhouse-sh2r2 - [how to install ClickHouse?](https://docs.altinity.com/altinitystablerelease/stablequickstartguide/)). Now we need to make them work together. +So you set up 3 nodes with zookeeper (zookeeper1, zookeeper2, zookeeper3 - [How to install zookeer?](https://docs.altinity.com/operationsguide/clickhouse-zookeeper/)), and and 4 nodes with ClickHouse® (clickhouse-sh1r1,clickhouse-sh1r2,clickhouse-sh2r1,clickhouse-sh2r2 - [how to install ClickHouse?](https://docs.altinity.com/altinitystablerelease/stablequickstartguide/)). Now we need to make them work together. Use ansible/puppet/salt or other systems to control the servers’ configurations. @@ -106,7 +106,7 @@ Engine=Distributed('{cluster}', 'default', ' #### **Hardening ClickHouse Security** -**See** [https://docs.altinity.com/operationsguide/security/clickhouse-hardening-guide/](https://docs.altinity.com/operationsguide/security/clickhouse-hardening-guide/) +**See** [https://docs.altinity.com/operationsguide/security/](https://docs.altinity.com/operationsguide/security/) ### Additional Settings diff --git a/content/en/altinity-kb-setup-and-maintenance/cluster-production-configuration-guide/hardening-clickhouse-security.md b/content/en/altinity-kb-setup-and-maintenance/cluster-production-configuration-guide/hardening-clickhouse-security.md index edd2e76778..8030bdec2b 100644 --- a/content/en/altinity-kb-setup-and-maintenance/cluster-production-configuration-guide/hardening-clickhouse-security.md +++ b/content/en/altinity-kb-setup-and-maintenance/cluster-production-configuration-guide/hardening-clickhouse-security.md @@ -6,7 +6,7 @@ description: > --- -ClickHouse is currently at the design stage of creating some universal backup solution. Some custom backup strategies are: +ClickHouse® is currently at the design stage of creating some universal backup solution. Some custom backup strategies are: 1. Each shard is backed up separately. 2. FREEZE the table/partition. For more information, see [Alter Freeze Partition](https://clickhouse.tech/docs/en/sql-reference/statements/alter/partition/#alter_freeze-partition). @@ -15,6 +15,6 @@ ClickHouse is currently at the design stage of creating some universal backup so 1. Cloud users are recommended to use [Rclone](https://rclone.org/). 4. Always add the full contents of the metadata subfolder that contains the current DB schema and clickhouse configs to your backup. 5. For a second replica, it’s enough to copy metadata and configuration. -6. Data in clickhouse is already compressed with lz4, backup can be compressed bit better, but avoid using cpu-heavy compression algorythms like gzip, use something like zstd instead. +6. Data in ClickHouse is already compressed with lz4, backup can be compressed bit better, but avoid using cpu-heavy compression algorythms like gzip, use something like zstd instead. -The tool automating that process [clickhouse-backup](https://github.com/AlexAkulov/clickhouse-backup). +The [Altinity Backup for ClickHouse tool](https://github.com/Altinity/clickhouse-backup) automates that process. diff --git a/content/en/altinity-kb-setup-and-maintenance/cluster-production-configuration-guide/hardware-requirements.md b/content/en/altinity-kb-setup-and-maintenance/cluster-production-configuration-guide/hardware-requirements.md index d99785c776..48ffaf38c9 100644 --- a/content/en/altinity-kb-setup-and-maintenance/cluster-production-configuration-guide/hardware-requirements.md +++ b/content/en/altinity-kb-setup-and-maintenance/cluster-production-configuration-guide/hardware-requirements.md @@ -4,7 +4,7 @@ linkTitle: "Hardware Requirements" description: > Hardware Requirements --- -### **ClickHouse** +### ClickHouse® ClickHouse will use all available hardware to maximize performance. So the more hardware - the better. As of this publication, the hardware requirements are: @@ -16,7 +16,7 @@ ClickHouse will use all available hardware to maximize performance. So the more For clouds: disk throughput is the more important factor compared to IOPS. Be aware of burst / baseline disk speed difference. -See also: [https://clickhouse.tech/benchmark/hardware/](https://clickhouse.tech/benchmark/hardware/) +See also: [https://benchmark.clickhouse.com/hardware/](https://benchmark.clickhouse.com/hardware/) ### **Zookeeper** diff --git a/content/en/altinity-kb-setup-and-maintenance/cluster-production-configuration-guide/network-configuration.md b/content/en/altinity-kb-setup-and-maintenance/cluster-production-configuration-guide/network-configuration.md index 63cf15cd37..ba28e295eb 100644 --- a/content/en/altinity-kb-setup-and-maintenance/cluster-production-configuration-guide/network-configuration.md +++ b/content/en/altinity-kb-setup-and-maintenance/cluster-production-configuration-guide/network-configuration.md @@ -8,7 +8,7 @@ description: > ### **Networking And Server Room Planning** -The network used for your ClickHouse cluster should be a fast network, ideally 10 Gbit or more. +The network used for your ClickHouse® cluster should be a fast network, ideally 10 Gbit or more. ClickHouse nodes generate a lot of traffic to exchange the data between nodes (port 9009 for replication, and 9000 for distributed queries). Zookeeper traffic in normal circumstanses is moderate, but in some special cases can also be very significant. diff --git a/content/en/altinity-kb-setup-and-maintenance/connection-problems.md b/content/en/altinity-kb-setup-and-maintenance/connection-problems.md index a4d6116c90..3ab146e90d 100644 --- a/content/en/altinity-kb-setup-and-maintenance/connection-problems.md +++ b/content/en/altinity-kb-setup-and-maintenance/connection-problems.md @@ -1,17 +1,17 @@ --- -title: "Can not connect to my ClickHouse server" -linkTitle: "Can not connect to my ClickHouse server" +title: "Can not connect to my ClickHouse® server" +linkTitle: "Can not connect to my ClickHouse® server" weight: 100 description: >- - Can not connect to my ClickHouse server. + Can not connect to my ClickHouse® server. --- -## Can not connect to my ClickHouse server +## Can not connect to my ClickHouse® server Errors like "Connection reset by peer, while reading from socket" -1. Ensure that the clickhouse-server is running +1. Ensure that the `clickhouse-server` is running ```sh systemctl status clickhouse-server @@ -28,15 +28,15 @@ Errors like Ensure you're not trying to connect to secure port without tls / https or vice versa. - For clickhouse-client - pay attention on host / port / secure flags. + For `clickhouse-client` - pay attention on host / port / secure flags. - Ensure the interface you're connecting to is the one which clickhouse listens (by default clickhouse listens only localhost). + Ensure the interface you're connecting to is the one which ClickHouse listens (by default ClickHouse listens only localhost). - Note: If you uncomment line `0.0.0.0` only - clickhouse will listen only ipv4 interfaces, - while the localhost (used by clickhouse-client) may be resolved to ipv6 address. And clickhouse-client may be failing to connect. + Note: If you uncomment line `0.0.0.0` only - ClickHouse will listen only ipv4 interfaces, + while the localhost (used by `clickhouse-client`) may be resolved to ipv6 address. And `clickhouse-client` may be failing to connect. - How to check which interfaces / ports do clickhouse listen? + How to check which interfaces / ports do ClickHouse listen? ```sh sudo lsof -i -P -n | grep LISTEN @@ -59,14 +59,14 @@ Errors like 4. Check for errors in /var/log/clickhouse-server/clickhouse-server.err.log ? -5. Is clickhouse able to serve some trivial tcp / http requests from localhost? +5. Is ClickHouse able to serve some trivial tcp / http requests from localhost? ```sh curl 127.0.0.1:9200 curl 127.0.0.1:8123 ``` -6. Check number of sockets opened by clickhouse +6. Check number of sockets opened by ClickHouse ```sh sudo lsof -i -a -p $(pidof clickhouse-server) diff --git a/content/en/altinity-kb-setup-and-maintenance/custom_settings.md b/content/en/altinity-kb-setup-and-maintenance/custom_settings.md index 46c0ef2613..89d42037f2 100644 --- a/content/en/altinity-kb-setup-and-maintenance/custom_settings.md +++ b/content/en/altinity-kb-setup-and-maintenance/custom_settings.md @@ -8,7 +8,7 @@ description: >- ## Using custom settings in config -You can not use the custom settings in config file 'as is', because clickhouse don't know which datatype should be used to parse it. +You can not use the custom settings in config file 'as is', because ClickHouse® don't know which datatype should be used to parse it. ```xml cat /etc/clickhouse-server/users.d/default_profile.xml diff --git a/content/en/altinity-kb-setup-and-maintenance/disk_encryption.md b/content/en/altinity-kb-setup-and-maintenance/disk_encryption.md index 73d3a79cf4..87d0d554dc 100644 --- a/content/en/altinity-kb-setup-and-maintenance/disk_encryption.md +++ b/content/en/altinity-kb-setup-and-maintenance/disk_encryption.md @@ -1,5 +1,5 @@ --- -title: "ClickHouse data/disk encryption (at rest)" +title: "ClickHouse® data/disk encryption (at rest)" linkTitle: "disk encryption" weight: 100 description: >- diff --git a/content/en/altinity-kb-setup-and-maintenance/filesystems.md b/content/en/altinity-kb-setup-and-maintenance/filesystems.md index 8e030ead5c..c33e44b876 100644 --- a/content/en/altinity-kb-setup-and-maintenance/filesystems.md +++ b/content/en/altinity-kb-setup-and-maintenance/filesystems.md @@ -1,18 +1,18 @@ --- -title: "ClickHouse and different filesystems" -linkTitle: "ClickHouse and different filesystems" +title: "ClickHouse® and different filesystems" +linkTitle: "ClickHouse® and different filesystems" weight: 100 description: >- - ClickHouse and different filesystems. + ClickHouse® and different filesystems. --- -In general ClickHouse should work with any POSIX-compatible filesystem. +In general ClickHouse® should work with any POSIX-compatible filesystem. * hard links and soft links support is mandatory. -* clickhouse can use O_DIRECT mode to bypass the cache (and async io) -* clickhouse can use renameat2 command for some atomic operations (not all the filesystems support that). +* ClickHouse can use O_DIRECT mode to bypass the cache (and async io) +* ClickHouse can use renameat2 command for some atomic operations (not all the filesystems support that). * depending on the schema and details of the usage the filesystem load can vary between the setup. The most natural load - is high throughput, with low or moderate IOPS. -* data is compressed in clickhouse (LZ4 by default), while indexes / marks / metadata files - no. Enabling disk-level compression can sometimes improve the compression, but can affect read / write speed. +* data is compressed in ClickHouse (LZ4 by default), while indexes / marks / metadata files - no. Enabling disk-level compression can sometimes improve the compression, but can affect read / write speed. ### ext4 @@ -31,7 +31,7 @@ We don't have real proofs/benchmarks though, example reports: others and they found that they accidentally set up those servers with XFS instead of Ext4. * in the system journal you can sometimes see reports like 'task XYZ blocked for more than 120 seconds' and stack trace pointing to XFS code (example: https://gist.github.com/filimonov/85b894268f978c2ccc18ea69bae5adbd ) * system goes to 99% io kernel under load sometimes. -* we have XFS, sometimes clickhouse goes to "sleep" because XFS daemon is doing smth unknown +* we have XFS, sometimes ClickHouse goes to "sleep" because XFS daemon is doing smth unknown Maybe the above problem can be workaround by some tuning/settings, but so far we do not have a working and confirmed way to do this. @@ -51,7 +51,7 @@ Tuning: **important note**: ZFS does not support the `renameat2` command, which is used by the Atomic database engine, and therefore some of the Atomic functionality will not be available. -In old versions of clickhouse, you can face issues with the O_DIRECT mode. +In old versions of ClickHouse, you can face issues with the O_DIRECT mode. Also there is a well-known (and controversional) Linus Torvalds opinion: "Don't Use ZFS on Linux" [[1]](https://www.realworldtech.com/forum/?threadid=189711&curpostid=189841), [[2]](https://arstechnica.com/gadgets/2020/01/linus-torvalds-zfs-statements-arent-right-heres-the-straight-dope/), [[3]](https://arstechnica.com/gadgets/2020/01/linus-torvalds-zfs-statements-arent-right-heres-the-straight-dope/). @@ -68,7 +68,7 @@ Not enough information. There are reports that some people successfully use it in their setups. A fast network is required. -There were some reports about data damage on the disks on older clickhouse versions, which could be caused by the issues with O_DIRECT or [async io support](https://lustre-discuss.lustre.narkive.com/zwcvyEEY/asynchronous-posix-i-o-with-lustre) on Lustre. +There were some reports about data damage on the disks on older ClickHouse versions, which could be caused by the issues with O_DIRECT or [async io support](https://lustre-discuss.lustre.narkive.com/zwcvyEEY/asynchronous-posix-i-o-with-lustre) on Lustre. ### NFS (and EFS) diff --git a/content/en/altinity-kb-setup-and-maintenance/high-cpu-usage.md b/content/en/altinity-kb-setup-and-maintenance/high-cpu-usage.md index 0d4db8f55a..222049777a 100644 --- a/content/en/altinity-kb-setup-and-maintenance/high-cpu-usage.md +++ b/content/en/altinity-kb-setup-and-maintenance/high-cpu-usage.md @@ -4,7 +4,7 @@ linkTitle: "High CPU usage" description: > High CPU usage --- -In general, it is a NORMAL situation for clickhouse that while processing a huge dataset it can use a lot of (or all of) the server resources. It is 'by design' - just to make the answers faster. +In general, it is a NORMAL situation for ClickHouse® that while processing a huge dataset it can use a lot of (or all of) the server resources. It is 'by design' - just to make the answers faster. The main directions to reduce the CPU usage **is to review the schema / queries** to limit the amount of the data which need to be processed, and to plan the resources in a way when single running query will not impact the others. diff --git a/content/en/altinity-kb-setup-and-maintenance/load-balancers.md b/content/en/altinity-kb-setup-and-maintenance/load-balancers.md index 05f4f67120..8274175b44 100644 --- a/content/en/altinity-kb-setup-and-maintenance/load-balancers.md +++ b/content/en/altinity-kb-setup-and-maintenance/load-balancers.md @@ -6,27 +6,27 @@ description: > --- In general - one of the simplest option to do load balancing is to implement it on the client side. -I.e. list several endpoints for clickhouse connections and add some logic to pick one of the nodes. +I.e. list several endpoints for ClickHouse® connections and add some logic to pick one of the nodes. Many client libraries support that. ## ClickHouse native protocol (port 9000) -Currently there are no protocol-aware proxies for clickhouse protocol, so the proxy / load balancer can work only on TCP level. +Currently there are no protocol-aware proxies for ClickHouse protocol, so the proxy / load balancer can work only on TCP level. One of the best option for TCP load balancer is haproxy, also nginx can work in that mode. Haproxy will pick one upstream when connection is established, and after that it will keep it connected to the same server until the client or server will disconnect (or some timeout will happen). -It can’t send different queries coming via a single connection to different servers, as he knows nothing about clickhouse protocol and doesn't know when one query ends and another start, it just sees the binary stream. +It can’t send different queries coming via a single connection to different servers, as he knows nothing about ClickHouse protocol and doesn't know when one query ends and another start, it just sees the binary stream. So for native protocol, there are only 3 possibilities: 1) close connection after each query client-side 2) close connection after each query server-side (currently there is only one setting for that - idle_connection_timeout=0, which is not exact what you need, but similar). -3) use a clickhouse server with Distributed table as a proxy. +3) use a ClickHouse server with Distributed table as a proxy. ## HTTP protocol (port 8123) There are many more options and you can use haproxy / nginx / chproxy, etc. -chproxy give some extra clickhouse-specific features, you can find a list of them at [https://chproxy.org](https://chproxy.org) +chproxy give some extra ClickHouse-specific features, you can find a list of them at [https://chproxy.org](https://chproxy.org) diff --git a/content/en/altinity-kb-setup-and-maintenance/logging.md b/content/en/altinity-kb-setup-and-maintenance/logging.md index f262bdad98..ec8a481e6a 100644 --- a/content/en/altinity-kb-setup-and-maintenance/logging.md +++ b/content/en/altinity-kb-setup-and-maintenance/logging.md @@ -30,6 +30,6 @@ df -Th df -Thi ``` -Q. How to configure logging in clickhouse? +Q. How to configure logging in ClickHouse®? A. See [https://github.com/ClickHouse/ClickHouse/blob/ceaf6d57b7f00e1925b85754298cf958a278289a/programs/server/config.xml\#L9-L62](https://github.com/ClickHouse/ClickHouse/blob/ceaf6d57b7f00e1925b85754298cf958a278289a/programs/server/config.xml#L9-L62) diff --git a/content/en/altinity-kb-setup-and-maintenance/monitoring-operator-exporter-compatibility.md b/content/en/altinity-kb-setup-and-maintenance/monitoring-operator-exporter-compatibility.md index 18c5bc1530..54d0549708 100644 --- a/content/en/altinity-kb-setup-and-maintenance/monitoring-operator-exporter-compatibility.md +++ b/content/en/altinity-kb-setup-and-maintenance/monitoring-operator-exporter-compatibility.md @@ -1,13 +1,12 @@ --- -title: "Compatibility layer for clickhouse-operator metric exporter" -linkTitle: "clickhouse-operator metrics names from clickhouse-server" +title: "Compatibility layer for the Altinity Kubernetes Operator for ClickHouse®" +linkTitle: "Compatibility layer for the Altinity Kubernetes Operator for ClickHouse®" weight: 100 description: >- Page description for heading and indexes. --- -It's possible to expose clickhouse-server metrics in clickhouse-operator style. -It's for clickhouse-operator grafana dashboard. +It's possible to expose `clickhouse-server` metrics in the style used by the Altinity Kubernetes Operator for ClickHouse®. It's for the `clickhouse-operator` grafana dashboard. ```sql CREATE VIEW system.operator_compatible_metrics diff --git a/content/en/altinity-kb-setup-and-maintenance/rbac.md b/content/en/altinity-kb-setup-and-maintenance/rbac.md index f98e717757..131204638d 100644 --- a/content/en/altinity-kb-setup-and-maintenance/rbac.md +++ b/content/en/altinity-kb-setup-and-maintenance/rbac.md @@ -1,6 +1,6 @@ --- -title: "Access Control and Account Management (RBAC)" -linkTitle: "RBAC example" +title: "ClickHouse® Access Control and Account Management (RBAC)" +linkTitle: "ClickHouse® RBAC example" weight: 100 description: >- Access Control and Account Management (RBAC). @@ -8,7 +8,7 @@ description: >- Documentation https://clickhouse.com/docs/en/operations/access-rights/ -## Enable RBAC and create admin user +## Enable ClickHouse® RBAC and create admin user Create an ```admin``` user like (root in MySQL or postgres in PostgreSQL) to do the DBA/admin ops in the `user.xml` file and [set the access management property for the admin user](https://clickhouse.com/docs/en/operations/access-rights/#enabling-access-control) diff --git a/content/en/altinity-kb-setup-and-maintenance/schema-migration-tools/_index.md b/content/en/altinity-kb-setup-and-maintenance/schema-migration-tools/_index.md index 35b95f7dba..dcf4d13fb7 100644 --- a/content/en/altinity-kb-setup-and-maintenance/schema-migration-tools/_index.md +++ b/content/en/altinity-kb-setup-and-maintenance/schema-migration-tools/_index.md @@ -1,8 +1,8 @@ --- -title: "Schema migration tools for ClickHouse" -linkTitle: "Schema migration tools for ClickHouse" +title: "Schema migration tools for ClickHouse®" +linkTitle: "Schema migration tools for ClickHouse®" description: > - Schema migration tools for ClickHouse + Schema migration tools for ClickHouse® --- * [atlas](https://atlasgo.io) * [https://atlasgo.io/guides/clickhouse](https://atlasgo.io/guides/clickhouse) diff --git a/content/en/altinity-kb-setup-and-maintenance/schema-migration-tools/golang-migrate.md b/content/en/altinity-kb-setup-and-maintenance/schema-migration-tools/golang-migrate.md index c08be391ce..0ad71eab16 100644 --- a/content/en/altinity-kb-setup-and-maintenance/schema-migration-tools/golang-migrate.md +++ b/content/en/altinity-kb-setup-and-maintenance/schema-migration-tools/golang-migrate.md @@ -8,7 +8,7 @@ description: > `migrate` is a simple schema migration tool written in golang. No external dependencies are required (like interpreter, jre), only one platform-specific executable. [golang-migrate/migrate](https://github.com/golang-migrate/migrate) -`migrate` supports several databases, including ClickHouse (support was introduced by [@kshvakov](https://github.com/kshvakov)). +`migrate` supports several databases, including ClickHouse® (support was introduced by [@kshvakov](https://github.com/kshvakov)). To store information about migrations state `migrate` creates one additional table in target database, by default that table is called `schema_migrations`. diff --git a/content/en/altinity-kb-setup-and-maintenance/source-pars-size-is-greater-than-maximum.md b/content/en/altinity-kb-setup-and-maintenance/source-pars-size-is-greater-than-maximum.md index 8fefa762f9..caeefcd8c3 100644 --- a/content/en/altinity-kb-setup-and-maintenance/source-pars-size-is-greater-than-maximum.md +++ b/content/en/altinity-kb-setup-and-maintenance/source-pars-size-is-greater-than-maximum.md @@ -21,7 +21,7 @@ SELECT * FROM system.merges ``` That logic is needed to prevent picking a log of huge merges simultaneously -(otherwise they will take all available slots and clickhouse will not be +(otherwise they will take all available slots and ClickHouse® will not be able to do smaller merges, which usally are important for keeping the number of parts stable). @@ -39,4 +39,4 @@ it could be caused by: 3) high CPU load (not enough CPU power to catch up with merges) 4) issue with table schemas leading to high merges pressure (high / increased number of tables / partitions / etc.) -Start from checking dmesg / system journals / clickhouse monitoring to find the anomalies. +Start from checking dmesg / system journals / ClickHouse monitoring to find the anomalies. diff --git a/content/en/altinity-kb-setup-and-maintenance/sysall.md b/content/en/altinity-kb-setup-and-maintenance/sysall.md index a7e9b0bf12..7ac2bb28f5 100644 --- a/content/en/altinity-kb-setup-and-maintenance/sysall.md +++ b/content/en/altinity-kb-setup-and-maintenance/sysall.md @@ -10,7 +10,7 @@ description: >- The idea is that you have a macros `cluster` with cluster name. -For example you have a cluster named `production` and this cluster includes all ClickHouse nodes. +For example you have a cluster named `production` and this cluster includes all ClickHouse® nodes. ```xml $ cat /etc/clickhouse-server/config.d/clusters.xml diff --git a/content/en/altinity-kb-setup-and-maintenance/uniq-uuid-doubled-clickhouse-upgrade.md b/content/en/altinity-kb-setup-and-maintenance/uniq-uuid-doubled-clickhouse-upgrade.md index 9ed44ab259..9c3391da36 100644 --- a/content/en/altinity-kb-setup-and-maintenance/uniq-uuid-doubled-clickhouse-upgrade.md +++ b/content/en/altinity-kb-setup-and-maintenance/uniq-uuid-doubled-clickhouse-upgrade.md @@ -1,14 +1,12 @@ --- -title: "AggregateFunction(uniq, UUID) doubled after ClickHouse upgrade" -linkTitle: "AggregateFunction(uniq, UUID) doubled after ClickHouse upgrade" +title: "AggregateFunction(uniq, UUID) doubled after ClickHouse® upgrade" +linkTitle: "AggregateFunction(uniq, UUID) doubled after ClickHouse® upgrade" weight: 100 -description: >- - Page description for heading and indexes. --- ## What happened -After ClickHouse upgrade from version pre 21.6 to version after 21.6, count of unique UUID in AggregatingMergeTree tables nearly doubled in case of merging of data which was generated in different ClickHouse versions. +After ClickHouse® upgrade from version pre 21.6 to version after 21.6, count of unique UUID in AggregatingMergeTree tables nearly doubled in case of merging of data which was generated in different ClickHouse versions. ## Why happened diff --git a/content/en/altinity-kb-setup-and-maintenance/uniqExact-to-uniq-combined.md b/content/en/altinity-kb-setup-and-maintenance/uniqExact-to-uniq-combined.md index 9a8c835bdc..5ff0a20927 100644 --- a/content/en/altinity-kb-setup-and-maintenance/uniqExact-to-uniq-combined.md +++ b/content/en/altinity-kb-setup-and-maintenance/uniqExact-to-uniq-combined.md @@ -3,7 +3,7 @@ title: "How to convert uniqExact states to approximate uniq functions states" linkTitle: "Convert uniqExact to uniq(Combined)" weight: 100 description: >- - A way to convert to uniqExactState to other uniqStates (like uniqCombinedState) in ClickHouse. + A way to convert to uniqExactState to other uniqStates (like uniqCombinedState) in ClickHouse® --- ## uniqExactState @@ -39,7 +39,7 @@ In case of `String` data type, it just the simple `sipHash128` function. ``` The second task: it needs to read a state and split it into an array of values. -Luckly for us, ClickHouse use the exact same serialization (`LEB128` + list of values) for Arrays (in this case if `uniqExactState` and `Array` are serialized into `RowBinary` format). +Luckly for us, ClickHouse® use the exact same serialization (`LEB128` + list of values) for Arrays (in this case if `uniqExactState` and `Array` are serialized into `RowBinary` format). We need one a helper -- `UDF` function to do that conversion: diff --git a/content/en/altinity-kb-setup-and-maintenance/useful-setting-to-turn-on.md b/content/en/altinity-kb-setup-and-maintenance/useful-setting-to-turn-on.md index df95d394ef..469de6a134 100644 --- a/content/en/altinity-kb-setup-and-maintenance/useful-setting-to-turn-on.md +++ b/content/en/altinity-kb-setup-and-maintenance/useful-setting-to-turn-on.md @@ -14,7 +14,7 @@ Some setting that are not enabled by default. Enables or disables complete dropping of data parts where all rows are expired in MergeTree tables. -When ttl_only_drop_parts is disabled (by default), the ClickHouse server only deletes expired rows according to their TTL. +When ttl_only_drop_parts is disabled (by default), the ClickHouse® server only deletes expired rows according to their TTL. When ttl_only_drop_parts is enabled, the ClickHouse server drops a whole part when all rows in it are expired. diff --git a/content/en/altinity-kb-setup-and-maintenance/who-ate-my-cpu.md b/content/en/altinity-kb-setup-and-maintenance/who-ate-my-cpu.md index f61a1371f7..88e8e33d30 100644 --- a/content/en/altinity-kb-setup-and-maintenance/who-ate-my-cpu.md +++ b/content/en/altinity-kb-setup-and-maintenance/who-ate-my-cpu.md @@ -3,7 +3,7 @@ title: "Who ate my CPU" linkTitle: "Who ate my CPU" weight: 100 description: >- - Queries to find which subsytem of ClickHouse is using the most of CPU. + Queries to find which subsytem of ClickHouse® is using the most of CPU. --- ## Merges diff --git a/content/en/altinity-kb-setup-and-maintenance/zookeeper-session-expired.md b/content/en/altinity-kb-setup-and-maintenance/zookeeper-session-expired.md index e312d56919..e1b0b68ca6 100644 --- a/content/en/altinity-kb-setup-and-maintenance/zookeeper-session-expired.md +++ b/content/en/altinity-kb-setup-and-maintenance/zookeeper-session-expired.md @@ -1,12 +1,12 @@ --- -title: "ZooKeeper session has expired" -linkTitle: "ZooKeeper session has expired" +title: "Zookeeper session has expired" +linkTitle: "Zookeeper session has expired" weight: 100 description: >- - ZooKeeper session has expired. + Zookeeper session has expired --- -> **Q. I get "ZooKeeper session has expired" once. What should i do? Should I worry?** +> **Q. I get "Zookeeper session has expired" once. What should i do? Should I worry?** Getting exceptions or lack of acknowledgement in distributed system from time to time is a normal situation. Your client should do the retry. If that happened once and your client do retries correctly - nothing to worry about. @@ -14,58 +14,58 @@ Your client should do the retry. If that happened once and your client do retrie It it happens often, or with every retry - it may be a sign of some misconfiguration / issue in cluster (see below). -> **Q. we see a lot of these: ZooKeeper session has expired. Switching to a new session** +> **Q. we see a lot of these: Zookeeper session has expired. Switching to a new session** -A. There is a single zookeeper session per server. But there are many threads that can use zookeeper simultaneously. -So the same event (we lose the single zookeeper session we had), will be reported by all the threads/queries which were using that zookeeper session. +A. There is a single Zookeeper session per server. But there are many threads that can use Zookeeper simultaneously. +So the same event (we lose the single Zookeeper session we had), will be reported by all the threads/queries which were using that Zookeeper session. -Usually after loosing the zookeeper session that exception is printed by all the thread which watch zookeeper replication queues, and all the threads which had some in-flight zookeeper operations (for example inserts, `ON CLUSTER` commands etc). +Usually after loosing the Zookeeper session that exception is printed by all the thread which watch Zookeeper replication queues, and all the threads which had some in-flight Zookeeper operations (for example inserts, `ON CLUSTER` commands etc). -If you see a lot of those simultaneously - that just means you have a lot of threads talking to zookeeper simultaneously (or may be you have many replicated tables?). +If you see a lot of those simultaneously - that just means you have a lot of threads talking to Zookeeper simultaneously (or may be you have many replicated tables?). BTW: every Replicated table comes with its own cost, so you [can't scale the number of replicated tables indefinitely](/altinity-kb-schema-design/how-much-is-too-much/#number-of-tables-system-wide-across-all-databases). -Typically after several hundreds (sometimes thousands) of replicated tables, the clickhouse server becomes unusable: it can't do any other work, but only keeping replication housekeeping tasks. 'ClickHouse-way' is to have a few (maybe dozens) of very huge tables instead of having thousands of tiny tables. (Side note: the number of not-replicated tables can be scaled much better). +Typically after several hundreds (sometimes thousands) of replicated tables, the ClickHouse® server becomes unusable: it can't do any other work, but only keeping replication housekeeping tasks. 'ClickHouse-way' is to have a few (maybe dozens) of very huge tables instead of having thousands of tiny tables. (Side note: the number of not-replicated tables can be scaled much better). So again if during short period of time you see lot of those exceptions and that don't happen anymore for a while - nothing to worry about. Just ensure your client is doing retries properly. > **Q. We are wondering what is causing that session to "timeout" as the default looks like 30 seconds, and there's certainly stuff happening much more frequently than every 30 seconds.** -Typically that has nothing with an expiration/timeout - even if you do nothing there are heartbeat events in the zookeeper protocol. +Typically that has nothing with an expiration/timeout - even if you do nothing there are heartbeat events in the Zookeeper protocol. -So internally inside clickhouse: -1) we have a 'zookeeper client' which in practice is a single zookeeper connection (TCP socket), with 2 threads - one serving reads, the seconds serving writes, and some API around. -2) while everything is ok zookeeper client keeps a single logical 'zookeeper session' (also by sending heartbeats etc). -3) we may have hundreds of 'users' of that zookeeper client - those are threads that do some housekeeping, serve queries etc. -4) zookeeper client normally have dozen 'in-flight' requests (asked by different threads). And if something bad happens with that -(disconnect, some issue with zookeeper server, some other failure), zookeeper client needs to re-establish the connection and switch to the new session +So internally inside ClickHouse: +1) we have a 'zookeeper client' which in practice is a single Zookeeper connection (TCP socket), with 2 threads - one serving reads, the seconds serving writes, and some API around. +2) while everything is ok Zookeeper client keeps a single logical 'zookeeper session' (also by sending heartbeats etc). +3) we may have hundreds of 'users' of that Zookeeper client - those are threads that do some housekeeping, serve queries etc. +4) Zookeeper client normally have dozen 'in-flight' requests (asked by different threads). And if something bad happens with that +(disconnect, some issue with Zookeeper server, some other failure), Zookeeper client needs to re-establish the connection and switch to the new session so all those 'in-flight' requests will be terminated with a 'session expired' exception. > **Q. That problem happens very often (all the time, every X minutes / hours / days).** -Sometimes the real issue can be visible somewhere close to the first 'session expired' exception in the log. (i.e. zookeeper client thread can +Sometimes the real issue can be visible somewhere close to the first 'session expired' exception in the log. (i.e. Zookeeper client thread can know & print to logs the real reason, while all 'user' threads just get 'session expired'). -Also zookeeper logs may ofter have a clue to that was the real problem. +Also Zookeeper logs may ofter have a clue to that was the real problem. -Known issues which can lead to session termination by zookeeper: +Known issues which can lead to session termination by Zookeeper: 1) connectivity / network issues. -2) `jute.maxbuffer` overrun. If you need to pass too much data in a single zookeeper transaction. (often happens if you need to do ALTER table UPDATE or other mutation on the table with big number of parts). The fix is adjusting JVM setting: -Djute.maxbuffer=8388608. See https://kb.altinity.com/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/jvm-sizes-and-garbage-collector-settings/ -3) XID overflow. XID is a transaction counter in zookeeper, if you do too many transactions the counter reaches maxint32, and to restart the counter zookeeper closes all the connections. Usually, that happens rarely, and is not avoidable in zookeeper (well in clickhouse-keeper that problem solved). There are some corner cases / some schemas which may end up with that XID overflow happening quite often. (a worst case we saw was once per 3 weeks). +2) `jute.maxbuffer` overrun. If you need to pass too much data in a single Zookeeper transaction. (often happens if you need to do ALTER table UPDATE or other mutation on the table with big number of parts). The fix is adjusting JVM setting: -Djute.maxbuffer=8388608. See https://kb.altinity.com/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/jvm-sizes-and-garbage-collector-settings/ +3) XID overflow. XID is a transaction counter in Zookeeper, if you do too many transactions the counter reaches maxint32, and to restart the counter Zookeeper closes all the connections. Usually, that happens rarely, and is not avoidable in Zookeeper (well in clickhouse-keeper that problem solved). There are some corner cases / some schemas which may end up with that XID overflow happening quite often. (a worst case we saw was once per 3 weeks). -> **Q. "ZooKeeper session has expired" happens every time I try to start the mutation / do other ALTER on Replicated table.** +> **Q. "Zookeeper session has expired" happens every time I try to start the mutation / do other ALTER on Replicated table.** -During ALTERing replicated table ClickHouse need to create a record in zookeeper listing all the parts which should be mutated (that usually means = list names of all parts of the table). If the size of list of parts exceeds maximum buffer size - zookeeper drops the connection. +During ALTERing replicated table ClickHouse need to create a record in Zookeeper listing all the parts which should be mutated (that usually means = list names of all parts of the table). If the size of list of parts exceeds maximum buffer size - Zookeeper drops the connection. Parts name length can be different for different tables. In average with default `jute.maxbuffer` (1Mb) mutations start to fail for tables which have more than 5000 parts. Solutions: 1) rethink partitioning, high number of parts in table is usually [not recommended](https://kb.altinity.com/altinity-kb-schema-design/how-much-is-too-much/#number-of-parts--partitions-system-wide-across-all-databases) -2) increase `jute.maxbuffer` on zookeeper side [to values about 8M](https://kb.altinity.com/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/jvm-sizes-and-garbage-collector-settings/) +2) increase `jute.maxbuffer` on Zookeeper side [to values about 8M](https://kb.altinity.com/altinity-kb-setup-and-maintenance/altinity-kb-zookeeper/jvm-sizes-and-garbage-collector-settings/) 3) use IN PARTITION clause for mutations (where applicable) - since [20.12](https://github.com/ClickHouse/ClickHouse/pull/13403) 4) switch to clickhouse-keeper -> **Q. "ZooKeeper session has expired and also Operation timeout" happens when reading blocks from Zookeeper**: +> **Q. "Zookeeper session has expired and also Operation timeout" happens when reading blocks from Zookeeper**: ```bash 2024.02.22 07:20:39.222171 [ 1047 ] {} ZooKeeperClient: Code: 999. Coordination::Exception: Operation timeout (no response) for request List for path: @@ -74,8 +74,8 @@ Solutions: Code: 999. Coordination::Exception: /clickhouse/tables/github_events/block_numbers/20240205105000 (Connection loss). ``` -Sometimes these `Session expired` and `operation timeout` are common, because of merges that read all the blocks in ZooKeeper for a table and if there are many blocks (and partitions) read time can be longer than the 10 secs default [operation timeout](https://clickhouse.com/docs/en/operations/server-configuration-parameters/settings#server-settings_zookeeper). -When dropping a partition, ClickHouse never drops old block numbers from ZooKeeper, so the list grows indefinitely. It is done as a precaution against race between DROP PARTITION and INSERT. It is safe to clean those old blocks manually +Sometimes these `Session expired` and `operation timeout` are common, because of merges that read all the blocks in Zookeeper for a table and if there are many blocks (and partitions) read time can be longer than the 10 secs default [operation timeout](https://clickhouse.com/docs/en/operations/server-configuration-parameters/settings#server-settings_zookeeper). +When dropping a partition, ClickHouse never drops old block numbers from Zookeeper, so the list grows indefinitely. It is done as a precaution against race between DROP PARTITION and INSERT. It is safe to clean those old blocks manually This is being addressed in **[#59507 Add `FORGET PARTITION` query to remove old partition nodes from](https://github.com/ClickHouse/ClickHouse/pull/59507)** diff --git a/content/en/altinity-kb-useful-queries/_index.md b/content/en/altinity-kb-useful-queries/_index.md index 1e8a336e1f..984639bd84 100644 --- a/content/en/altinity-kb-useful-queries/_index.md +++ b/content/en/altinity-kb-useful-queries/_index.md @@ -5,6 +5,6 @@ keywords: - clickhouse queries - clickhouse datasets description: > - Access useful ClickHouse queries, from finding database size, missing blocks, checking table metadata in Zookeeper, and more. + Access useful ClickHouse® queries, from finding database size, missing blocks, checking table metadata in Zookeeper, and more. weight: 6 --- diff --git a/content/en/altinity-kb-useful-queries/altinity-kb-number-of-active-parts-in-a-partition.md b/content/en/altinity-kb-useful-queries/altinity-kb-number-of-active-parts-in-a-partition.md index 184893b938..f8107d0909 100644 --- a/content/en/altinity-kb-useful-queries/altinity-kb-number-of-active-parts-in-a-partition.md +++ b/content/en/altinity-kb-useful-queries/altinity-kb-number-of-active-parts-in-a-partition.md @@ -4,7 +4,7 @@ linkTitle: "Number of active parts in a partition" description: > Number of active parts in a partition --- -## Q: Why do I have several active parts in a partition? Why ClickHouse does not merge them immediately? +## Q: Why do I have several active parts in a partition? Why ClickHouse® does not merge them immediately? ### A: CH does not merge parts by time diff --git a/content/en/altinity-kb-useful-queries/debug-hang.md b/content/en/altinity-kb-useful-queries/debug-hang.md index ca546df103..bc847f9757 100644 --- a/content/en/altinity-kb-useful-queries/debug-hang.md +++ b/content/en/altinity-kb-useful-queries/debug-hang.md @@ -8,7 +8,7 @@ description: >- ## Debug hanging / freezing things -If ClickHouse is busy with something and you don't know what's happeing, you can easily check the stacktraces of all the thread which are working +If ClickHouse® is busy with something and you don't know what's happening, you can easily check the stacktraces of all the thread which are working ```sql SELECT diff --git a/content/en/altinity-kb-useful-queries/detached-parts.md b/content/en/altinity-kb-useful-queries/detached-parts.md index e7221629df..f10cef3491 100644 --- a/content/en/altinity-kb-useful-queries/detached-parts.md +++ b/content/en/altinity-kb-useful-queries/detached-parts.md @@ -7,7 +7,7 @@ description: > Here is what different statuses mean: 1. Parts are renamed to 'ignored' if they were found during ATTACH together with other, bigger parts that cover the same blocks of data, i.e. they were already merged into something else. -2. parts are renamed to 'broken' if ClickHouse was not able to load data from the parts. There could be different reasons: some files are lost, checksums are not correct, etc. +2. parts are renamed to 'broken' if ClickHouse® was not able to load data from the parts. There could be different reasons: some files are lost, checksums are not correct, etc. 3. parts are renamed to 'unexpected' if they are present locally, but are not found in ZooKeeper, in case when an insert was not completed properly. The part is detached only if it's old enough (5 minutes), otherwise CH registers this part in ZooKeeper as a new part. 4. parts are renamed to 'cloned' if ClickHouse have had some parts on local disk while repairing lost replica so already existed parts being renamed and put in detached directory. Controlled by setting `detach_old_local_parts_when_cloning_replica`. @@ -44,5 +44,5 @@ merge-not-byte-identical mutate-not-byte-identical - broken-on-start clone -covered-by-broken - that means that clickhouse during initialization of replicated table detected that some part is not ok, and decided to refetch it from healthy replicas. So the part itself will be detached as 'broken' and if that part was a result of merge / mutation all the previuos generations of that will be marked as covered-by-broken. If clickhouse was able to download the final part you don't need those covered-by-broken. +covered-by-broken - that means that ClickHouse during initialization of replicated table detected that some part is not ok, and decided to refetch it from healthy replicas. So the part itself will be detached as 'broken' and if that part was a result of merge / mutation all the previuos generations of that will be marked as covered-by-broken. If clickhouse was able to download the final part you don't need those covered-by-broken. ``` diff --git a/content/en/altinity-kb-useful-queries/query_log.md b/content/en/altinity-kb-useful-queries/query_log.md index 5cde20648e..5b89092d93 100644 --- a/content/en/altinity-kb-useful-queries/query_log.md +++ b/content/en/altinity-kb-useful-queries/query_log.md @@ -38,9 +38,11 @@ GROUP BY normalized_query_hash ORDER BY UserTime DESC LIMIT 30 FORMAT Vertical +``` +-- modern ClickHouse® --- modern ClickHouse +```sql SELECT hostName() as host, normalized_query_hash, diff --git a/content/en/upgrade/removing-empty-parts.md b/content/en/upgrade/removing-empty-parts.md index a683ffb09f..931704c324 100644 --- a/content/en/upgrade/removing-empty-parts.md +++ b/content/en/upgrade/removing-empty-parts.md @@ -4,7 +4,7 @@ linkTitle: "Removing empty parts" description: > Removing empty parts --- -Removing of empty parts is a new feature introduced in 20.12. +Removing of empty parts is a new feature introduced in ClickHouse® 20.12. Earlier versions leave empty parts (with 0 rows) if TTL removes all rows from a part ([https://github.com/ClickHouse/ClickHouse/issues/5491](https://github.com/ClickHouse/ClickHouse/issues/5491)). If you set up TTL for your data it is likely that there are quite many empty parts in your system. diff --git a/content/en/upgrade/removing-lost-parts.md b/content/en/upgrade/removing-lost-parts.md index 7cf6dfd4d1..d9a58b2f05 100644 --- a/content/en/upgrade/removing-lost-parts.md +++ b/content/en/upgrade/removing-lost-parts.md @@ -9,7 +9,7 @@ description: > The explanation is here https://github.com/ClickHouse/ClickHouse/pull/26716 -The problem is introduced in 20.1. +The problem is introduced in ClickHouse® 20.1. The problem is fixed in 21.8 and backported to 21.3.16, 21.6.9, 21.7.6. diff --git a/content/en/upgrade/vulnerabilities.md b/content/en/upgrade/vulnerabilities.md index a40a05905e..5feff1eff6 100644 --- a/content/en/upgrade/vulnerabilities.md +++ b/content/en/upgrade/vulnerabilities.md @@ -6,7 +6,7 @@ description: >- Vulnerabilities --- -## 2022-03-15: 7 vulnerabulities in ClickHouse were published. +## 2022-03-15: 7 vulnerabulities in ClickHouse® were published. See the details https://jfrog.com/blog/7-rce-and-dos-vulnerabilities-found-in-clickhouse-dbms/ diff --git a/content/en/using-this-knowledgebase/_index.md b/content/en/using-this-knowledgebase/_index.md index be5ed418ed..7e91310489 100644 --- a/content/en/using-this-knowledgebase/_index.md +++ b/content/en/using-this-knowledgebase/_index.md @@ -5,7 +5,7 @@ keywords: - clickhouse updates - clickhouse contributions description: > - Add pages, make updates, and contribute to this ClickHouse knowledge base. + Add pages, make updates, and contribute to this ClickHouse® knowledge base. weight: 12 --- The Altinity Knowledge Base is built on GitHub Pages, using Hugo and Docsy. This guide provides a brief description on how to make updates and add to this knowledge base. From 4e750015c729dc88dca52c8a670f34be6fec66fc Mon Sep 17 00:00:00 2001 From: Doug Tidwell Date: Tue, 30 Jul 2024 22:14:30 -0400 Subject: [PATCH 15/17] Site cleanup, mostly minor changes --- .../async-inserts.md | 140 ++++++++++-------- 1 file changed, 76 insertions(+), 64 deletions(-) diff --git a/content/en/altinity-kb-queries-and-syntax/async-inserts.md b/content/en/altinity-kb-queries-and-syntax/async-inserts.md index f9fc18ab6f..40ed122416 100644 --- a/content/en/altinity-kb-queries-and-syntax/async-inserts.md +++ b/content/en/altinity-kb-queries-and-syntax/async-inserts.md @@ -2,91 +2,103 @@ title: "Async INSERTs" linkTitle: "Async INSERTs" description: > - Async INSERTs + Async INSERTs --- -Async INSERTs is a ClickHouse® feature tha enables batching data automatically and transparently on the server-side. Although async inserts work, they still have issues, but have been improved in latest versions. We recommend to batch at app/ingestor level because you will have more control and you decouple this responsibility from ClickHouse. That being said, there are some insights about Async inserts you should now: +Async INSERTs is a ClickHouse® feature tha enables batching data automatically and transparently on the server-side. We recommend to batch at app/ingestor level because you will have more control and you decouple this responsibility from ClickHouse, but there are use cases where this is not possible and Async inserts come in handy if you have hundreds or thousands of clients doing small inserts. + +You can check how they work here: [Async inserts](https://clickhouse.com/docs/en/optimize/asynchronous-inserts) + +Some insights about Async inserts you should now: * Async inserts give acknowledgment immediately after the data got inserted into the buffer (wait_for_async_insert = 0) or by default, after the data got written to a part after flushing from buffer (wait_for_async_insert = 1). -* INSERT .. SELECT is NOT async insert. (You can use matView + Null table OR ephemeral columns instead of INPUT function, then ASYNC insert work) +* `INSERT .. SELECT` is NOT async insert. (You can use matView + Null table OR ephemeral columns instead of INPUT function so Async inserts will work) * Async inserts will do (idempotent) retries. -* Async inserts can collect data for some offline remote clusters: Yandex self-driving cars were collecting the metrics data during the ride into ClickHouse installed on the car computer to a distributed table with Async inserts enabled, which were flushed to the cluster once the car was plugged to the network. * Async inserts can do batching, so multiple inserts can be squashed as a single insert (but in that case, retries are not idempotent anymore). -* Async inserts can lose your data in case of sudden restart (no fsyncs by default). -* Async inserted data becomes available for selects not immediately after acknowledgment. -* Async inserts generally have more `moving parts` there are some background threads monitoring new data to be sent and pushing it out. -* Async inserts require extra monitoring from different system.tables (see `system.part_log`, `system.query_log` and `system.asynchronous_inserts` for 22.8). Previously such queries didn't appear in the query log. Check: [#33239](https://github.com/ClickHouse/ClickHouse/pull/33239). * Important to use `wait_for_async_insert = 1` because with any error you will loose data without knowing it. For example your table is read only -> losing data, out of disk space -> losing data, too many parts -> losing data. +* If `wait_for_async_insert = 0`: + * Async inserts can loose your data in case of sudden restart (no fsyncs by default). + * Async inserted data becomes available for selects not immediately after acknowledgment. + * Async insert is fast sending ACK to clients unblocking them, because they have to wait until ACK is received. If your use case can handle data loss, you can use `wait_for_async_insert = 0` it will increase the throughput. +* Async inserts generally have more `moving parts` there are some background threads monitoring new data to be sent and pushing it out. +* Async inserts require extra monitoring from different system.tables (see `system.part_log`, `system.query_log`, `system.asynchronous_inserts` and `system_asynchronous_insert_log`). + +# features / improvements + +* Async insert dedup: Support block deduplication for asynchronous inserts. Before this change, async inserts did not support deduplication, because multiple small inserts coexisted in one inserted batch: + - [#38075](https://github.com/ClickHouse/ClickHouse/issues/38075) + - [#43304](https://github.com/ClickHouse/ClickHouse/pull/43304) +* Added system table `asynchronous_insert_log`. It contains information about asynchronous inserts (including results of queries in fire-and-forget mode. (with wait_for_async_insert=0)) for better introspection [#42040](https://github.com/ClickHouse/ClickHouse/pull/42040) +* Support async inserts in **clickhouse-client** for queries with inlined data **(Native protocol)**: + - [#34267](https://github.com/ClickHouse/ClickHouse/pull/34267) + - [#54098](https://github.com/ClickHouse/ClickHouse/issues/54098) + - [#54381](https://github.com/ClickHouse/ClickHouse/issues/54381) +* Async insert backpressure [#4762](https://github.com/ClickHouse/ClickHouse/issues/47623) +* Limit the deduplication overhead when using `async_insert_deduplicate` [#46549](https://github.com/ClickHouse/ClickHouse/pull/46549) +* `SYSTEM FLUSH ASYNC INSERTS` [#49160](https://github.com/ClickHouse/ClickHouse/pull/49160) +* Adjustable asynchronous insert timeouts [#58486](https://github.com/ClickHouse/ClickHouse/pull/58486) -## 22.10+ bugfixes/features - -* Fixed bug which could lead to deadlock while using asynchronous inserts. See [#43233](https://github.com/ClickHouse/ClickHouse/pull/43233). -* Async insert dedup: Support block deduplication for asynchronous inserts. Before this change, async inserts did not support deduplication, because multiple small inserts coexisted in one inserted batch. See [#38075](https://github.com/ClickHouse/ClickHouse/issues/38075) and [#43304](https://github.com/ClickHouse/ClickHouse/pull/43304). -* Added system table `asynchronous_insert_log`. It contains information about asynchronous inserts (including results of queries in fire-and-forget mode. (with wait_for_async_insert=0)) for better introspection. See [#42040](https://github.com/ClickHouse/ClickHouse/pull/42040). -* Support async inserts in **clickhouse-client** for queries with inlined data **(Native protocol)**. - - Check: - - [#34267](https://github.com/ClickHouse/ClickHouse/pull/34267) - - [#54098](https://github.com/ClickHouse/ClickHouse/issues/54098). - - Also new feature opened by @alexey-milovidov to use async inserts with prepared blocks like a normal insert [#54381](https://github.com/ClickHouse/ClickHouse/issues/54381) -* Async insert backpressure: - - **[#47623 Back pressure for asynchronous inserts](https://github.com/ClickHouse/ClickHouse/issues/47623)** -- In order to limit the deduplication overhead when using `async_insert_deduplicate`, clickhouse writes lots of keys to keeper, and it's easy to exceed the txn limitation. So the setting `async_insert_max_query_number` is added to limit the number of async inserts in a block. This will impact on the throughput of async inserts, so this setting should not considered when duplication is disabled: `async_insert_deduplicate = 0` - - **[#46549 enable async-insert-max-query-number only if async_insert_deduplicate](https://github.com/ClickHouse/ClickHouse/pull/46549)** -- SYSTEM FLUSH ASYNC INSERTS - - **[#49160 Allow to flush asynchronous insert queue](https://github.com/ClickHouse/ClickHouse/pull/49160)** -- Fix crash when async inserts with deduplication are used for ReplicatedMergeTree tables using a nondefault merging algorithm - - **[Fix async insert with deduplication for ReplicatedMergeTree using merging algorithms #51676](https://github.com/ClickHouse/ClickHouse/pull/51676)** -- Async inserts not working with log_comment setting: - - **[Async inserts dont work if people is using log_comment setting with different values](https://github.com/ClickHouse/ClickHouse/issues/48430)** -- Fix misbehaviour with async inserts - - **[Correctly disable async insert with deduplication when its not needed #50663](https://github.com/ClickHouse/ClickHouse/pull/50663)** - -## To improve observability / introspection +## bugfixes + +- Fixed bug which could lead to deadlock while using asynchronous inserts [#43233](https://github.com/ClickHouse/ClickHouse/pull/43233). +- Fix crash when async inserts with deduplication are used for ReplicatedMergeTree tables using a nondefault merging algorithm [#51676](https://github.com/ClickHouse/ClickHouse/pull/51676) +- Async inserts not working with log_comment setting [48430](https://github.com/ClickHouse/ClickHouse/issues/48430) +- Fix misbehaviour with async inserts with deduplication [#50663](https://github.com/ClickHouse/ClickHouse/pull/50663) +- Reject Insert if `async_insert=1` and `deduplicate_blocks_in_dependent_materialized_views=1`[#60888](https://github.com/ClickHouse/ClickHouse/pull/60888) +- Disable `async_insert_use_adaptive_busy_timeout` correctly with compatibility settings [#61486](https://github.com/ClickHouse/ClickHouse/pull/61468) + + +## observability / introspection In 22.x versions, it is not possible to relate `part_log/query_id` column with `asynchronous_insert_log/query_id` column. We need to use `query_log/query_id`: `asynchronous_insert_log` shows up the `query_id` and `flush_query_id` of each async insert. The `query_id` from `asynchronous_insert_log` shows up in the `system.query_log` as `type = 'QueryStart'` but the same `query_id` does not show up in the `query_id` column of the `system.part_log`. Because the `query_id` column in the `part_log` is the identifier of the INSERT query that created a data part, and it seems it is for sync INSERTS but not for async inserts. -So in `asynchronous_inserts` table you can check the current batch that still has not been flushed. In the `asynchronous_insert_log` you can find a log of all the async inserts executed. +So in `asynchronous_inserts` table you can check the current batch that still has not been flushed. In the `asynchronous_insert_log` you can find a log of all the flushed async inserts. + +This has been improved in **ClickHouse 23.7** Flush queries for async inserts (the queries that do the final push of data) are now logged in the `system.query_log` where they appear as `query_kind = 'AsyncInsertFlush'` [#51160](https://github.com/ClickHouse/ClickHouse/pull/51160) + + +## Versions -But in **ClickHouse 23.7** Flush queries for async inserts (the queries that do the final push of data) are now logged in the `system.query_log` where they appear as `query_kind = 'AsyncInsertFlush'`. -- **[Log async insert flush queries into to system.query_log and system.processes #51160](https://github.com/ClickHouse/ClickHouse/pull/51160)** +- **23.8** is a good version to start using async inserts because of the improvements and bugfixes. +- **24.3** the new adaptative timeout mechanism has been added so clickhouse will throttle the inserts based on the server load.[#58486](https://github.com/ClickHouse/ClickHouse/pull/58486) ## Metrics ```sql SELECT name FROM system.columns -WHERE (table = 'metric_log') AND (name ILIKE '%Async%') - -Query id: 3d0b7cbc-7990-4498-9c18-1c988796c487 - -┌─name────────────────────────────────────────────────┐ -│ ProfileEvent_AsyncInsertQuery │ -│ ProfileEvent_AsyncInsertBytes │ -│ ProfileEvent_AsyncInsertCacheHits │ -│ ProfileEvent_FailedAsyncInsertQuery │ -│ ProfileEvent_AsynchronousReadWaitMicroseconds │ -│ ProfileEvent_AsynchronousRemoteReadWaitMicroseconds │ -│ CurrentMetric_DiskObjectStorageAsyncThreads │ -│ CurrentMetric_DiskObjectStorageAsyncThreadsActive │ -│ CurrentMetric_AsynchronousInsertThreads │ -│ CurrentMetric_AsynchronousInsertThreadsActive │part -│ CurrentMetric_AsynchronousReadWait │ -│ CurrentMetric_PendingAsyncInsert │ -│ CurrentMetric_AsyncInsertCacheSize │ -└─────────────────────────────────────────────────────┘ +WHERE (`table` = 'metric_log') AND ((name ILIKE '%asyncinsert%') OR (name ILIKE '%asynchronousinsert%')) + +┌─name─────────────────────────────────────────────┐ +│ ProfileEvent_AsyncInsertQuery │ +│ ProfileEvent_AsyncInsertBytes │ +│ ProfileEvent_AsyncInsertRows │ +│ ProfileEvent_AsyncInsertCacheHits │ +│ ProfileEvent_FailedAsyncInsertQuery │ +│ ProfileEvent_DistributedAsyncInsertionFailures │ +│ CurrentMetric_AsynchronousInsertThreads │ +│ CurrentMetric_AsynchronousInsertThreadsActive │ +│ CurrentMetric_AsynchronousInsertThreadsScheduled │ +│ CurrentMetric_AsynchronousInsertQueueSize │ +│ CurrentMetric_AsynchronousInsertQueueBytes │ +│ CurrentMetric_PendingAsyncInsert │ +│ CurrentMetric_AsyncInsertCacheSize │ +└──────────────────────────────────────────────────┘ SELECT * FROM system.metrics -WHERE metric ILIKE '%async%' - -┌─metric──────────────────────────────┬─value─┬─description──────────────────────────────────────────────────────────────────────┐ -│ AsynchronousInsertThreads │ 0 │ Number of threads in the AsynchronousInsert thread pool. │ -│ AsynchronousInsertThreadsActive │ 0 │ Number of threads in the AsynchronousInsert thread pool running a task. │ -│ AsynchronousReadWait │ 0 │ Number of threads waiting for asynchronous read. │ -│ PendingAsyncInsert │ 0 │ Number of asynchronous inserts that are waiting for flush. │ -│ AsyncInsertCacheSize │ 0 │ Number of async insert hash id in cache │ -└─────────────────────────────────────┴───────┴──────────────────────────────────────────────────────────────────────────────────┘ -``` +WHERE (metric ILIKE '%asyncinsert%') OR (metric ILIKE '%asynchronousinsert%') + +┌─metric─────────────────────────────┬─value─┬─description─────────────────────────────────────────────────────────────┐ +│ AsynchronousInsertThreads │ 1 │ Number of threads in the AsynchronousInsert thread pool. │ +│ AsynchronousInsertThreadsActive │ 0 │ Number of threads in the AsynchronousInsert thread pool running a task. │ +│ AsynchronousInsertThreadsScheduled │ 0 │ Number of queued or active jobs in the AsynchronousInsert thread pool. │ +│ AsynchronousInsertQueueSize │ 1 │ Number of pending tasks in the AsynchronousInsert queue. │ +│ AsynchronousInsertQueueBytes │ 680 │ Number of pending bytes in the AsynchronousInsert queue. │ +│ PendingAsyncInsert │ 7 │ Number of asynchronous inserts that are waiting for flush. │ +│ AsyncInsertCacheSize │ 0 │ Number of async insert hash id in cache │ +└────────────────────────────────────┴───────┴─────────────────────────────────────────────────────────────────────────┘ +``` \ No newline at end of file From a8e3bcb120604111d57c458259727ac65dc0b958 Mon Sep 17 00:00:00 2001 From: Doug Tidwell Date: Tue, 30 Jul 2024 22:34:26 -0400 Subject: [PATCH 16/17] Site cleanup, mostly minor changes --- .../ClickHouse_python_drivers.md | 10 +++++----- .../en/altinity-kb-queries-and-syntax/async-inserts.md | 2 +- .../altinity-kb-data-migration/_index.md | 6 +++--- .../clickhouse-backup-diff.md | 2 +- .../clickhouse-operator.md | 6 +++--- .../hardening-clickhouse-security.md | 6 +++--- 6 files changed, 16 insertions(+), 16 deletions(-) diff --git a/content/en/altinity-kb-integrations/ClickHouse_python_drivers.md b/content/en/altinity-kb-integrations/ClickHouse_python_drivers.md index 2775ecbfb2..6e339d42cf 100644 --- a/content/en/altinity-kb-integrations/ClickHouse_python_drivers.md +++ b/content/en/altinity-kb-integrations/ClickHouse_python_drivers.md @@ -1,12 +1,12 @@ --- -title: "ClickHouse python drivers" -linkTitle: "ClickHouse python drivers" +title: "ClickHouse® python drivers" +linkTitle: "ClickHouse® python drivers" weight: 100 description: >- - Python main drivers/clients for ClickHouse + Python main drivers/clients for ClickHouse® --- -## ClickHouse python drivers +## ClickHouse® python drivers There are two main python drivers that can be used with ClickHouse. They all have their different set of features and use cases: @@ -32,7 +32,7 @@ The **`clickhouse-driver`** is a Python library used for interacting with ClickH - Good Pandas/Numpy support: [https://clickhouse-driver.readthedocs.io/en/latest/features.html#numpy-pandas-support](https://clickhouse-driver.readthedocs.io/en/latest/features.html#numpy-pandas-support) - Good SQLALchemy support: [https://pypi.org/project/clickhouse-sqlalchemy/](https://pypi.org/project/clickhouse-sqlalchemy/) -This was the first python driver for clickhouse. It has a mature codebase. By default clickhouse drivers uses [synchronous code](https://clickhouse-driver.readthedocs.io/en/latest/quickstart.html#async-and-multithreading). There is a wrapper to convert code to asynchronous, [https://github.com/long2ice/asynch](https://github.com/long2ice/asynch) +This was the first python driver for ClickHouse. It has a mature codebase. By default ClickHouse drivers uses [synchronous code](https://clickhouse-driver.readthedocs.io/en/latest/quickstart.html#async-and-multithreading). There is a wrapper to convert code to asynchronous, [https://github.com/long2ice/asynch](https://github.com/long2ice/asynch) Here you can get a basic working example from Altinity repo for ingestion/selection using clickhouse-driver: diff --git a/content/en/altinity-kb-queries-and-syntax/async-inserts.md b/content/en/altinity-kb-queries-and-syntax/async-inserts.md index 6d9c23a9e0..f3f47a714e 100644 --- a/content/en/altinity-kb-queries-and-syntax/async-inserts.md +++ b/content/en/altinity-kb-queries-and-syntax/async-inserts.md @@ -63,7 +63,7 @@ This has been improved in **ClickHouse 23.7** Flush queries for async inserts (t ## Versions - **23.8** is a good version to start using async inserts because of the improvements and bugfixes. -- **24.3** the new adaptative timeout mechanism has been added so clickhouse will throttle the inserts based on the server load.[#58486](https://github.com/ClickHouse/ClickHouse/pull/58486) +- **24.3** the new adaptative timeout mechanism has been added so ClickHouse will throttle the inserts based on the server load.[#58486](https://github.com/ClickHouse/ClickHouse/pull/58486) ## Metrics diff --git a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/_index.md b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/_index.md index c9d304bf88..4836eab2d5 100644 --- a/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/_index.md +++ b/content/en/altinity-kb-setup-and-maintenance/altinity-kb-data-migration/_index.md @@ -11,7 +11,7 @@ Pros: Cons: * Decoding & encoding of common data formats may be slower / require more CPU -* The data size is usually bigger than ClickHouse formats. +* The data size is usually bigger than ClickHouse® formats. * Some of the common data formats have limitations. {{% alert title="Info" color="info" %}} @@ -56,7 +56,7 @@ Internally it works like smart `INSERT INTO cluster(…) SELECT * FROM ...` with {{% /alert %}} {{% alert title="Info" color="info" %}} -Run clickhouse copier on the same nodes as receiver clickhouse, to avoid doubling the network load. +Run `clickhouse-copier` on the same nodes as receiver ClickHouse, to avoid doubling the network load. {{% /alert %}} See details in: @@ -102,7 +102,7 @@ Pros: Cons: * Table schema should be the same. -* Works only when the source and the destination clickhouse servers share the same zookeeper (without chroot) +* Works only when the source and the destination ClickHouse servers share the same zookeeper (without chroot) * Needs to access zookeeper and ClickHouse replication ports: (`interserver_http_port` or `interserver_https_port`) ```sql diff --git a/content/en/altinity-kb-setup-and-maintenance/clickhouse-backup-diff.md b/content/en/altinity-kb-setup-and-maintenance/clickhouse-backup-diff.md index 2a94414f59..d1d608cbba 100644 --- a/content/en/altinity-kb-setup-and-maintenance/clickhouse-backup-diff.md +++ b/content/en/altinity-kb-setup-and-maintenance/clickhouse-backup-diff.md @@ -6,7 +6,7 @@ description: > --- ### differential backups using clickhouse-backup -1. Download the latest clickhouse-backup for your platform https://github.com/Altinity/clickhouse-backup/releases +1. Download the latest version of Altinity Backup for ClickHouse®: https://github.com/Altinity/clickhouse-backup/releases ```bash # ubuntu / debian diff --git a/content/en/altinity-kb-setup-and-maintenance/clickhouse-operator.md b/content/en/altinity-kb-setup-and-maintenance/clickhouse-operator.md index 71a3dec9c5..9e77b43baa 100644 --- a/content/en/altinity-kb-setup-and-maintenance/clickhouse-operator.md +++ b/content/en/altinity-kb-setup-and-maintenance/clickhouse-operator.md @@ -1,11 +1,11 @@ --- -title: "Altinity Kubernetes Operator For ClickHouse" +title: "Altinity Kubernetes Operator For ClickHouse®" linkTitle: "ClickHouse operator" weight: 100 description: >- - Altinity Kubernetes Operator For ClickHouse + Altinity Kubernetes Operator For ClickHouse® --- -## Altinity Operator Documentation +## Altinity Kubernetes Operator for ClickHouse® Documentation https://github.com/Altinity/clickhouse-operator/blob/master/docs/README.md diff --git a/content/en/altinity-kb-setup-and-maintenance/cluster-production-configuration-guide/hardening-clickhouse-security.md b/content/en/altinity-kb-setup-and-maintenance/cluster-production-configuration-guide/hardening-clickhouse-security.md index 5f58440bdc..9ed93ecf72 100644 --- a/content/en/altinity-kb-setup-and-maintenance/cluster-production-configuration-guide/hardening-clickhouse-security.md +++ b/content/en/altinity-kb-setup-and-maintenance/cluster-production-configuration-guide/hardening-clickhouse-security.md @@ -6,15 +6,15 @@ description: > --- -ClickHouse is currently at the design stage of creating some universal backup solution. Some custom backup strategies are: +ClickHouse® is currently at the design stage of creating some universal backup solution. Some custom backup strategies are: 1. Each shard is backed up separately. 2. FREEZE the table/partition. For more information, see [Alter Freeze Partition](https://clickhouse.tech/docs/en/sql-reference/statements/alter/partition/#alter_freeze-partition). 1. This creates hard links in shadow subdirectory. 3. rsync that directory to a backup location, then remove that subfolder from shadow. 1. Cloud users are recommended to use [Rclone](https://rclone.org/). -4. Always add the full contents of the metadata subfolder that contains the current DB schema and clickhouse configs to your backup. +4. Always add the full contents of the metadata subfolder that contains the current DB schema and ClickHouse configs to your backup. 5. For a second replica, it’s enough to copy metadata and configuration. -6. Data in clickhouse is already compressed with lz4, backup can be compressed bit better, but avoid using cpu-heavy compression algorythms like gzip, use something like zstd instead. +6. Data in ClickHouse is already compressed with lz4, backup can be compressed bit better, but avoid using cpu-heavy compression algorythms like gzip, use something like zstd instead. The tool automating that process [clickhouse-backup](https://github.com/Altinity/clickhouse-backup). From 8540100c34e15e759892e18a4969af323d485243 Mon Sep 17 00:00:00 2001 From: Doug Tidwell Date: Wed, 31 Jul 2024 09:13:23 -0400 Subject: [PATCH 17/17] Updated right nav legal statement --- layouts/partials/toc.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/layouts/partials/toc.html b/layouts/partials/toc.html index 63124da2b5..4941b2bf19 100644 --- a/layouts/partials/toc.html +++ b/layouts/partials/toc.html @@ -8,5 +8,5 @@ {{ partial "social-links.html" . }}
- Altinity®, Altinity.Cloud®, and Altinity Stable® are registered trademarks of Altinity, Inc. ClickHouse® is a registered trademark of ClickHouse, Inc.; Altinity is not affiliated with or associated with ClickHouse, Inc. Kafka, Kubernetes, MySQL, and PostgreSQL are trademarks and property of their respective owners. + Altinity®, Altinity.Cloud®, and Altinity Stable® are registered trademarks of Altinity, Inc. ClickHouse® is a registered trademark of ClickHouse, Inc.; Altinity is not affiliated with or associated with ClickHouse, Inc.
external tool support -
(like clickhouse-backup)
good / mature limited / beta