From fc8ac1821bdd434048f8d5a36fe3f37880e18649 Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Mon, 18 Mar 2024 22:40:26 +0900 Subject: [PATCH] Default merge concurrency (#4755) * Changed the default of `default_merge_concurrency` to `2 * num_cpus / 3` We have a observed at least one case (rather extreme however) where the default settings did not make it possible for merging to keep up with indexing. * Added documentation for indexer's cpu_capacity Closes #4716 --- docs/configuration/node-config.md | 2 ++ quickwit/quickwit-config/src/node_config/mod.rs | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/configuration/node-config.md b/docs/configuration/node-config.md index ec8f50c7b5a..0e6e2e4d739 100644 --- a/docs/configuration/node-config.md +++ b/docs/configuration/node-config.md @@ -166,7 +166,9 @@ This section contains the configuration options for an indexer. The split store | `split_store_max_num_bytes` | Maximum size in bytes allowed in the split store for each index-source pair. | `100G` | | `split_store_max_num_splits` | Maximum number of files allowed in the split store for each index-source pair. | `1000` | | `max_concurrent_split_uploads` | Maximum number of concurrent split uploads allowed on the node. | `12` | +| `merge_concurrency` | Maximum number of merge operations that can be executed on the node at one point in time. | `(2 x num threads available) / 3` | | `enable_otlp_endpoint` | If true, enables the OpenTelemetry exporter endpoint to ingest logs and traces via the OpenTelemetry Protocol (OTLP). | `false` | +| `cpu_capacity` | Advisory parameter used by the control plane. The value can expressed be in threads (e.g. `2`) or in term of millicpus (`2000m`). The control plane will attempt to schedule indexing pipelines on the different nodes proportionally to the cpu capacity advertised by the indexer. It is NOT used as a limit. All pipelines will be scheduled regardless of whether the cluster has sufficient capacity or not. The control plane does not attempt to spread the work equally when the load is well below the `cpu_capacity`. Users who need a balanced load on all of their indexer nodes can set the `cpu_capacity` to an arbitrarily low value as long as they keep it proportional to the number of threads available. | `num threads available` | Example: diff --git a/quickwit/quickwit-config/src/node_config/mod.rs b/quickwit/quickwit-config/src/node_config/mod.rs index 839d760ade1..12473933c7e 100644 --- a/quickwit/quickwit-config/src/node_config/mod.rs +++ b/quickwit/quickwit-config/src/node_config/mod.rs @@ -139,7 +139,7 @@ impl IndexerConfig { } pub fn default_merge_concurrency() -> NonZeroUsize { - NonZeroUsize::new(num_cpus::get() / 2).unwrap_or(NonZeroUsize::new(1).unwrap()) + NonZeroUsize::new(num_cpus::get() * 2 / 3).unwrap_or(NonZeroUsize::new(1).unwrap()) } fn default_cpu_capacity() -> CpuCapacity {