From 7ce94ca3ff2ffa4ba464bf9c9d9c2cc9d2b276b7 Mon Sep 17 00:00:00 2001 From: "A. Stoewer" Date: Tue, 14 Nov 2023 10:20:19 +1000 Subject: [PATCH] Make vParquet3 the default encoding (#3134) * Make vParquet3 the default encoding * Mention vParquet3 as defaults in Tempo's documentation * Regenerate manifest.md * Add to CHANGELOG.md * Apply suggested documentation improvements Co-authored-by: Kim Nylander <104772500+knylander-grafana@users.noreply.github.com> --- CHANGELOG.md | 1 + cmd/tempo/app/config_test.go | 4 +-- docs/sources/tempo/configuration/_index.md | 2 +- docs/sources/tempo/configuration/manifest.md | 26 +++++++++++++------- docs/sources/tempo/configuration/parquet.md | 17 ++++++------- tempodb/encoding/versioned.go | 2 +- 6 files changed, 30 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7101ea813cb..39a5c97eccc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ * [ENHANCEMENT] Introduced `AttributePolicyMatch` & `IntrinsicPolicyMatch` structures to match span attributes based on strongly typed values & precompiled regexp [#3025](https://github.com/grafana/tempo/pull/3025) (@andriusluk) * [CHANGE] TraceQL/Structural operators performance improvement. [#3088](https://github.com/grafana/tempo/pull/3088) (@joe-elliott) * [CHANGE] Merge the processors overrides set through runtime overrides and user-configurable overrides [#3125](https://github.com/grafana/tempo/pull/3125) (@kvrhdn) +* [CHANGE] Make vParquet3 the default block encoding [#2526](https://github.com/grafana/tempo/pull/3134) (@stoewer) * [FEATURE] Introduce list_blocks_concurrency on GCS and S3 backends to control backend load and performance. [#2652](https://github.com/grafana/tempo/pull/2652) (@zalegrala) * [BUGFIX] Include statusMessage intrinsic attribute in tag search. [#3084](https://github.com/grafana/tempo/pull/3084) (@rcrowe) * [ENHANCEMENT] Update poller to make use of previous results and reduce backend load. [#2652](https://github.com/grafana/tempo/pull/2652) (@zalegrala) diff --git a/cmd/tempo/app/config_test.go b/cmd/tempo/app/config_test.go index ff714ddb767..31d26221052 100644 --- a/cmd/tempo/app/config_test.go +++ b/cmd/tempo/app/config_test.go @@ -13,7 +13,7 @@ import ( "github.com/grafana/tempo/tempodb/backend" "github.com/grafana/tempo/tempodb/encoding/common" v2 "github.com/grafana/tempo/tempodb/encoding/v2" - "github.com/grafana/tempo/tempodb/encoding/vparquet2" + "github.com/grafana/tempo/tempodb/encoding/vparquet3" ) func TestConfig_CheckConfig(t *testing.T) { @@ -79,7 +79,7 @@ func TestConfig_CheckConfig(t *testing.T) { name: "warnings for v2 settings when they drift from default", config: func() *Config { cfg := newDefaultConfig() - cfg.StorageConfig.Trace.Block.Version = vparquet2.VersionString + cfg.StorageConfig.Trace.Block.Version = vparquet3.VersionString cfg.StorageConfig.Trace.Block.IndexDownsampleBytes = 1 cfg.StorageConfig.Trace.Block.IndexPageSizeBytes = 1 cfg.Compactor.Compactor.ChunkSizeBytes = 1 diff --git a/docs/sources/tempo/configuration/_index.md b/docs/sources/tempo/configuration/_index.md index 600197d4afa..099818a7b9a 100644 --- a/docs/sources/tempo/configuration/_index.md +++ b/docs/sources/tempo/configuration/_index.md @@ -1121,7 +1121,7 @@ storage: # block configuration block: # block format version. options: v2, vParquet, vParquet2, vParquet3 - [version: | default = vParquet2] + [version: | default = vParquet3] # bloom filter false positive rate. lower values create larger filters but fewer false positives [bloom_filter_false_positive: | default = 0.01] diff --git a/docs/sources/tempo/configuration/manifest.md b/docs/sources/tempo/configuration/manifest.md index 453d15bc9e4..1483af9a027 100644 --- a/docs/sources/tempo/configuration/manifest.md +++ b/docs/sources/tempo/configuration/manifest.md @@ -18,7 +18,7 @@ go run ./cmd/tempo --storage.trace.backend=local --storage.trace.local.path=/tmp ## Complete configuration {{% admonition type="note" %}} -This manifest was generated on 2023-08-23. +This manifest was generated on 2023-11-13. {{% /admonition %}} ```yaml @@ -54,6 +54,7 @@ server: register_instrumentation: true graceful_shutdown_timeout: 30s http_server_read_timeout: 30s + http_server_read_header_timeout: 0s http_server_write_timeout: 30s http_server_idle_timeout: 2m0s grpc_server_max_recv_msg_size: 16777216 @@ -66,6 +67,7 @@ server: grpc_server_keepalive_timeout: 20s grpc_server_min_time_between_pings: 10s grpc_server_ping_without_stream_allowed: true + grpc_server_num_workers: 0 log_format: logfmt log_level: info log_source_ips_enabled: false @@ -105,6 +107,7 @@ internal_server: register_instrumentation: false graceful_shutdown_timeout: 30s http_server_read_timeout: 30s + http_server_read_header_timeout: 0s http_server_write_timeout: 30s http_server_idle_timeout: 2m0s grpc_server_max_recv_msg_size: 0 @@ -117,8 +120,9 @@ internal_server: grpc_server_keepalive_timeout: 0s grpc_server_min_time_between_pings: 0s grpc_server_ping_without_stream_allowed: false - log_format: "" - log_level: "" + grpc_server_num_workers: 0 + log_format: logfmt + log_level: info log_source_ips_enabled: false log_source_ips_header: "" log_source_ips_regex: "" @@ -171,7 +175,7 @@ distributor: override_ring_key: distributor forwarders: [] extend_writes: true - retry_after_on_resource_exhausted: '0' + retry_after_on_resource_exhausted: 0s ingester_client: pool_config: checkinterval: 15s @@ -511,7 +515,7 @@ metrics_generator: block: bloom_filter_false_positive: 0.01 bloom_filter_shard_size_bytes: 102400 - version: vParquet2 + version: vParquet3 search_encoding: snappy search_page_size_bytes: 1048576 v2_index_downsample_bytes: 1048576 @@ -543,7 +547,7 @@ metrics_generator: path: "" wal: wal_segment_size: 134217728 - wal_compression: false + wal_compression: none stripe_size: 16384 truncate_frequency: 2h0m0s min_wal_time: 300000 @@ -558,7 +562,7 @@ metrics_generator: v2_encoding: none search_encoding: none ingestion_time_range_slack: 0s - version: vParquet2 + version: vParquet3 metrics_ingestion_time_range_slack: 30s query_timeout: 30s override_ring_key: metrics-generator @@ -574,11 +578,11 @@ storage: v2_encoding: snappy search_encoding: none ingestion_time_range_slack: 2m0s - version: vParquet2 + version: vParquet3 block: bloom_filter_false_positive: 0.01 bloom_filter_shard_size_bytes: 102400 - version: vParquet2 + version: vParquet3 search_encoding: snappy search_page_size_bytes: 1048576 v2_index_downsample_bytes: 1048576 @@ -615,6 +619,7 @@ storage: insecure: false object_cache_control: "" object_metadata: {} + list_blocks_concurrency: 3 s3: tls_cert_path: "" tls_key_path: "" @@ -641,6 +646,7 @@ storage: storage_class: "" metadata: {} native_aws_auth_enabled: false + list_blocks_concurrency: 3 azure: storage_account_name: "" storage_account_key: "" @@ -694,6 +700,7 @@ overrides: insecure: false object_cache_control: "" object_metadata: {} + list_blocks_concurrency: 3 s3: tls_cert_path: "" tls_key_path: "" @@ -720,6 +727,7 @@ overrides: storage_class: "" metadata: {} native_aws_auth_enabled: false + list_blocks_concurrency: 3 azure: storage_account_name: "" storage_account_key: "" diff --git a/docs/sources/tempo/configuration/parquet.md b/docs/sources/tempo/configuration/parquet.md index 0b6e8fc2ff7..c0bb34ccf8b 100644 --- a/docs/sources/tempo/configuration/parquet.md +++ b/docs/sources/tempo/configuration/parquet.md @@ -17,34 +17,33 @@ If you install using the new Helm charts, then Parquet is enabled by default. ## Considerations -The Parquet block format is enabled by default in Tempo 2.0. No data conversion or upgrade process is necessary. As soon as the format is enabled, Tempo starts writing data in that format, leaving existing data as-is. +The Parquet block format is enabled by default since Tempo 2.0. No data conversion or upgrade process is necessary. As soon as the format is enabled, Tempo starts writing data in that format, leaving existing data as-is. Block formats based on Parquet require more CPU and memory resources than the previous `v2` format but provide search and TraceQL functionality. ## Choose a different block format -The default block format is `vParquet2`, which is the latest iteration of Tempo's Parquet based columnar block format. -It is still possible to use the previous format `vParquet`. -To enable it, set the block version option to `vParquet` in the Storage section of the configuration file. - -Since v2.3, the block format `vParquet3` is available. +The default block format is `vParquet3`, which is the latest iteration of Tempo's Parquet-based columnar block format. It introduces dedicated attribute columns, which improve query performance by storing attributes in own columns, rather than in the generic attribute key-value list. For more information, see [Dedicated attribute columns]({{< relref "../operations/tempo_cli" >}}). +You can still use the previous format `vParquet2`. +To enable it, set the block version option to `vParquet2` in the Storage section of the configuration file. + ```yaml # block format version. options: v2, vParquet, vParquet2, vParquet3 -[version: vParquet3] +[version: vParquet2] ``` -It is possible to disable Parquet and use the previous `v2` block format. This disables all forms of search, but also reduces resource consumption, and may be desired for a high-throughput cluster that does not need these capabilities. Set the block version option to `v2` in the Storage section of the configuration file. +In some cases, you may choose to disable Parquet and use the old `v2` block format. Using the `v2` block format disables all forms of search, but also reduces resource consumption, and may be desired for a high-throughput cluster that does not need these capabilities. To make this change, set the block version option to `v2` in the Storage section of the configuration file. ```yaml # block format version. options: v2, vParquet, vParquet2, vParquet3 [version: v2] ``` -To re-enable the default `vParquet2` format, remove the block version option from the Storage section of the configuration file or set the option to `vParquet2`. +To re-enable the default `vParquet3` format, remove the block version option from the Storage section of the configuration file or set the option to `vParquet3`. ## Parquet configuration parameters diff --git a/tempodb/encoding/versioned.go b/tempodb/encoding/versioned.go index 2ef89d16825..40236f07109 100644 --- a/tempodb/encoding/versioned.go +++ b/tempodb/encoding/versioned.go @@ -73,7 +73,7 @@ func FromVersion(v string) (VersionedEncoding, error) { // DefaultEncoding for newly written blocks. func DefaultEncoding() VersionedEncoding { - return vparquet2.Encoding{} + return vparquet3.Encoding{} } // LatestEncoding returns the most recent encoding.