From 0a09c6af2485c024db2c12209e793ecf4b908c3c Mon Sep 17 00:00:00 2001 From: Salvatore Campagna <93581129+salvatore-campagna@users.noreply.github.com> Date: Mon, 30 Sep 2024 16:52:24 +0200 Subject: [PATCH] Include a variable to control `synthetic_source_keep` parameter (#682) This PR introduces a new track parameter, `synthetic_source_keep` which is used to control the behaviour of synthetic source for all field types. It can have values `none`, `arrays` or `all` (`all` not usable when set at index level). See https://github.com/elastic/elasticsearch/pull/112706 to understand the effect of each value. Later on we will use this to change the behaviour in our nightlies and run benchmarks on both `elastic/logs` and `elastic/security` using value `arrays`. --- elastic/logs/README.md | 1 + .../logs/templates/component/track-shared-logsdb-mode.json | 4 +++- elastic/security/README.md | 1 + .../templates/component/track-shared-logsdb-mode.json | 5 +++++ 4 files changed, 10 insertions(+), 1 deletion(-) diff --git a/elastic/logs/README.md b/elastic/logs/README.md index b7aac8c2..d68b52bf 100644 --- a/elastic/logs/README.md +++ b/elastic/logs/README.md @@ -220,6 +220,7 @@ The following parameters are available: * `corpora_uri_base` (default: `https://rally-tracks.elastic.co`) - Specify the base location of the datasets used by this track. * `lifecycle` (default: unset to fall back on Serverless detection) - Specifies the lifecycle management feature to use for data streams. Use `ilm` for index lifecycle management or `dlm` for data lifecycle management. By default, `dlm` will be used for benchmarking Serverless Elasticsearch. * `workflow-request-cache` (default: `true`) - Explicit control of request cache query parameter in searches executed in a workflow. This can be further overriden at an operation level with `request-cache` parameter. +* `synthetic_source_keep` (default: unset) - Allows overriding the default synthetic source behaviour for all field types with the following values: `none` (equivalent to unset) - no source is stored, `arrays` - source stored as is only for multi-value (array) fields. ### Data Download Parameters diff --git a/elastic/logs/templates/component/track-shared-logsdb-mode.json b/elastic/logs/templates/component/track-shared-logsdb-mode.json index a2b08886..dce96a16 100644 --- a/elastic/logs/templates/component/track-shared-logsdb-mode.json +++ b/elastic/logs/templates/component/track-shared-logsdb-mode.json @@ -4,9 +4,11 @@ {% if index_mode %} "index": { "mode": {{ index_mode | tojson }}, + {% if synthetic_source_keep and synthetic_source_keep != 'none' %} "mapping": { - "synthetic_source_keep": "arrays" + "synthetic_source_keep": "{{ synthetic_source_keep }}" } + {% endif %} } {% endif %} } diff --git a/elastic/security/README.md b/elastic/security/README.md index c1e8d3f2..63594859 100644 --- a/elastic/security/README.md +++ b/elastic/security/README.md @@ -84,6 +84,7 @@ The following parameters are available: * `wait_for_status` (default: `green`) - The track creates Data Streams prior to indexing. All created Data Streams must at least reach this status before indexing commences. Reduce to `yellow` for clusters where green isn't possible e.g. single node. * `corpora_uri_base` (default: `https://rally-tracks.elastic.co`) - Specify the base location of the datasets used by this track. * `index_mode` (default: unset) - A parameter meant to be used internally which defines one of the available indexing modes, "standard", "logsdb" or "time_series". If not set, "standard" is used. +* `synthetic_source_keep` (default: unset) - Allows overriding the default synthetic source behaviour for all field types with the following values: `none` (equivalent to unset) - no source is stored, `arrays` - source stored as is only for multi-value (array) fields. ### Data Generation Parameters diff --git a/elastic/security/templates/component/track-shared-logsdb-mode.json b/elastic/security/templates/component/track-shared-logsdb-mode.json index 1f686086..adc476be 100644 --- a/elastic/security/templates/component/track-shared-logsdb-mode.json +++ b/elastic/security/templates/component/track-shared-logsdb-mode.json @@ -4,6 +4,11 @@ {% if index_mode %} "index": { "mode": {{ index_mode | tojson }}, + {% if synthetic_source_keep and synthetic_source_keep != 'none' %} + "mapping": { + "synthetic_source_keep": "{{ synthetic_source_keep }}" + }, + {% endif %} "sort.field": [ "host.hostname", "@timestamp" ], "sort.order": [ "asc", "desc" ], "sort.missing": ["_first", "_last"]