diff --git a/docs/changelog/118380.yaml b/docs/changelog/118380.yaml new file mode 100644 index 0000000000000..8b26c871fb172 --- /dev/null +++ b/docs/changelog/118380.yaml @@ -0,0 +1,5 @@ +pr: 118380 +summary: Restore original "is within leaf" value in `SparseVectorFieldMapper` +area: Mapping +type: bug +issues: [] diff --git a/docs/changelog/118559.yaml b/docs/changelog/118559.yaml new file mode 100644 index 0000000000000..e43de2eb23c03 --- /dev/null +++ b/docs/changelog/118559.yaml @@ -0,0 +1,11 @@ +pr: 118559 +summary: Make logsdb general available +area: Logs +type: feature +issues: [] +highlight: + title: Make logsdb general available + body: >- + Logsdb has been GA-ed. Logsdb is a feature that allows Elasticsearch to store logs more efficiently. + Logsdb allows to reduce storage usage upto ~2.5 times compared to storing logs in Elasticsearch without Logsdb. + notable: true diff --git a/docs/reference/connector/docs/connectors-salesforce.asciidoc b/docs/reference/connector/docs/connectors-salesforce.asciidoc index c640751de92c0..f5c5512ad5cc4 100644 --- a/docs/reference/connector/docs/connectors-salesforce.asciidoc +++ b/docs/reference/connector/docs/connectors-salesforce.asciidoc @@ -200,7 +200,7 @@ Once the permissions are set, assign the Profiles, Permission Set or Permission Follow these steps in Salesforce: 1. Navigate to `Administration` under the `Users` section. -2. Select `Users` and choose the user to set the permissions to. +2. Select `Users` and choose the user to set the permissions to. 3. Set the `Profile`, `Permission Set` or `Permission Set Groups` created in the earlier steps. [discrete#es-connectors-salesforce-sync-rules] @@ -249,7 +249,7 @@ Allowed values are *SOQL* and *SOSL*. [ { "query": "FIND {Salesforce} IN ALL FIELDS", - "language": "SOSL" + "language": "SOSL" } ] ---- @@ -381,7 +381,13 @@ See <> for more specifics o [discrete#es-connectors-salesforce-known-issues] ===== Known issues -There are currently no known issues for this connector. +* *DLS feature is "type-level" not "document-level"* ++ +Salesforce DLS, added in 8.13.0, does not accomodate specific access controls to specific Salesforce Objects. +Instead, if a given user/group can have access to _any_ Objects of a given type (`Case`, `Lead`, `Opportunity`, etc), that user/group will appear in the `\_allow_access_control` list for _all_ of the Objects of that type. +See https://github.com/elastic/connectors/issues/3028 for more details. ++ + Refer to <> for a list of known issues for all connectors. [discrete#es-connectors-salesforce-security] @@ -396,7 +402,7 @@ This connector is built with the {connectors-python}[Elastic connector framework View the {connectors-python}/connectors/sources/salesforce.py[source code for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). -// Closing the collapsible section +// Closing the collapsible section =============== @@ -598,7 +604,7 @@ Once the permissions are set, assign the Profiles, Permission Set or Permission Follow these steps in Salesforce: 1. Navigate to `Administration` under the `Users` section. -2. Select `Users` and choose the user to set the permissions to. +2. Select `Users` and choose the user to set the permissions to. 3. Set the `Profile`, `Permission Set` or `Permission Set Groups` created in the earlier steps. [discrete#es-connectors-salesforce-client-sync-rules] @@ -648,7 +654,7 @@ Allowed values are *SOQL* and *SOSL*. [ { "query": "FIND {Salesforce} IN ALL FIELDS", - "language": "SOSL" + "language": "SOSL" } ] ---- @@ -781,7 +787,13 @@ See <> for more specifics o [discrete#es-connectors-salesforce-client-known-issues] ===== Known issues -There are currently no known issues for this connector. +* *DLS feature is "type-level" not "document-level"* ++ +Salesforce DLS, added in 8.13.0, does not accomodate specific access controls to specific Salesforce Objects. +Instead, if a given user/group can have access to _any_ Objects of a given type (`Case`, `Lead`, `Opportunity`, etc), that user/group will appear in the `\_allow_access_control` list for _all_ of the Objects of that type. +See https://github.com/elastic/connectors/issues/3028 for more details. ++ + Refer to <> for a list of known issues for all connectors. [discrete#es-connectors-salesforce-client-security] @@ -797,5 +809,5 @@ This connector is built with the {connectors-python}[Elastic connector framework View the {connectors-python}/connectors/sources/salesforce.py[source code for this connector^] (branch _{connectors-branch}_, compatible with Elastic _{minor-version}_). -// Closing the collapsible section +// Closing the collapsible section =============== diff --git a/docs/reference/data-streams/logs.asciidoc b/docs/reference/data-streams/logs.asciidoc index e870289bcf7be..3af5e09889a89 100644 --- a/docs/reference/data-streams/logs.asciidoc +++ b/docs/reference/data-streams/logs.asciidoc @@ -1,26 +1,20 @@ [[logs-data-stream]] == Logs data stream -preview::[Logs data streams and the logsdb index mode are in tech preview and may be changed or removed in the future. Don't use logs data streams or logsdb index mode in production.] +IMPORTANT: The {es} `logsdb` index mode is generally available in Elastic Cloud Hosted +and self-managed Elasticsearch as of version 8.17, and is enabled by default for +logs in https://www.elastic.co/elasticsearch/serverless[{serverless-full}]. A logs data stream is a data stream type that stores log data more efficiently. In benchmarks, log data stored in a logs data stream used ~2.5 times less disk space than a regular data -stream. The exact impact will vary depending on your data set. - -The following features are enabled in a logs data stream: - -* <>, which omits storing the `_source` field. When the document source is requested, it is synthesized from document fields upon retrieval. - -* Index sorting. This yields a lower storage footprint. By default indices are sorted by `host.name` and `@timestamp` fields at index time. - -* More space efficient compression for fields with <> enabled. +stream. The exact impact varies by data set. [discrete] [[how-to-use-logsds]] === Create a logs data stream -To create a logs data stream, set your index template `index.mode` to `logsdb`: +To create a logs data stream, set your <> `index.mode` to `logsdb`: [source,console] ---- @@ -39,10 +33,12 @@ PUT _index_template/my-index-template // TEST <1> The index mode setting. -<2> The index template priority. By default, Elasticsearch ships with an index template with a `logs-*-*` pattern with a priority of 100. You need to define a priority higher than 100 to ensure that this index template gets selected over the default index template for the `logs-*-*` pattern. See the <> for more information. +<2> The index template priority. By default, Elasticsearch ships with a `logs-*-*` index template with a priority of 100. To make sure your index template takes priority over the default `logs-*-*` template, set its `priority` to a number higher than 100. For more information, see <>. After the index template is created, new indices that use the template will be configured as a logs data stream. You can start indexing data and <>. +You can also set the index mode and adjust other template settings in <>. + //// [source,console] ---- @@ -50,3 +46,180 @@ DELETE _index_template/my-index-template ---- // TEST[continued] //// + +[[logsdb-default-settings]] + +[discrete] +[[logsdb-synthetic-source]] +=== Synthetic source + +If you have the required https://www.elastic.co/subscriptions[subscription], `logsdb` index mode uses <>, which omits storing the original `_source` +field. Instead, the document source is synthesized from doc values or stored fields upon document retrieval. + +If you don't have the required https://www.elastic.co/subscriptions[subscription], `logsdb` mode uses the original `_source` field. + +Before using synthetic source, make sure to review the <>. + +When working with multi-value fields, the `index.mapping.synthetic_source_keep` setting controls how field values +are preserved for <> reconstruction. In `logsdb`, the default value is `arrays`, +which retains both duplicate values and the order of entries. However, the exact structure of +array elements and objects is not necessarily retained. Preserving duplicates and ordering can be critical for some +log fields, such as DNS A records, HTTP headers, and log entries that represent sequential or repeated events. + +[discrete] +[[logsdb-sort-settings]] +=== Index sort settings + +In `logsdb` index mode, the following sort settings are applied by default: + +`index.sort.field`: `["host.name", "@timestamp"]`:: +Indices are sorted by `host.name` and `@timestamp` by default. The `@timestamp` field is automatically injected if it is not present. + +`index.sort.order`: `["desc", "desc"]`:: +Both `host.name` and `@timestamp` are sorted in descending (`desc`) order, prioritizing the latest data. + +`index.sort.mode`: `["min", "min"]`:: +The `min` mode sorts indices by the minimum value of multi-value fields. + +`index.sort.missing`: `["_first", "_first"]`:: +Missing values are sorted to appear `_first`. + +You can override these default sort settings. For example, to sort on different fields +and change the order, manually configure `index.sort.field` and `index.sort.order`. For more details, see +<>. + +When using the default sort settings, the `host.name` field is automatically injected into the index mappings as a `keyword` field to ensure that sorting can be applied. This guarantees that logs are efficiently sorted and retrieved based on the `host.name` and `@timestamp` fields. + +NOTE: If `subobjects` is set to `true` (default), the `host` field is mapped as an object field +named `host` with a `name` child field of type `keyword`. If `subobjects` is set to `false`, +a single `host.name` field is mapped as a `keyword` field. + +To apply different sort settings to an existing data stream, update the data stream's component templates, and then +perform or wait for a <>. + +NOTE: In `logsdb` mode, the `@timestamp` field is automatically injected if it's not already present. If you apply custom sort settings, the `@timestamp` field is injected into the mappings but is not +automatically added to the list of sort fields. + +[discrete] +[[logsdb-host-name]] +==== Existing data streams + +If you're enabling `logsdb` index mode on a data stream that already exists, make sure to check mappings and sorting. The `logsdb` mode automatically maps `host.name` as a keyword if it's included in the sort settings. If a `host.name` field already exists but has a different type, mapping errors might occur, preventing `logsdb` mode from being fully applied. + +To avoid mapping conflicts, consider these options: + +* **Adjust mappings:** Check your existing mappings to ensure that `host.name` is mapped as a keyword. + +* **Change sorting:** If needed, you can remove `host.name` from the sort settings and use a different set of fields. Sorting by `@timestamp` can be a good fallback. + +* **Switch to a different <>**: If resolving `host.name` mapping conflicts is not feasible, you can choose not to use `logsdb` mode. + +IMPORTANT: On existing data streams, `logsdb` mode is applied on <> (automatic or manual). + +[discrete] +[[logsdb-specialized-codecs]] +=== Specialized codecs + +By default, `logsdb` index mode uses the `best_compression` <>, which applies {wikipedia}/Zstd[ZSTD] +compression to stored fields. You can switch to the `default` codec for faster compression with a slightly larger storage footprint. + +The `logsdb` index mode also automatically applies specialized codecs for numeric doc values, in order to optimize storage usage. Numeric fields are +encoded using the following sequence of codecs: + +* **Delta encoding**: + Stores the difference between consecutive values instead of the actual values. + +* **Offset encoding**: + Stores the difference from a base value rather than between consecutive values. + +* **Greatest Common Divisor (GCD) encoding**: + Finds the greatest common divisor of a set of values and stores the differences as multiples of the GCD. + +* **Frame Of Reference (FOR) encoding**: + Determines the smallest number of bits required to encode a block of values and uses + bit-packing to fit such values into larger 64-bit blocks. + +Each encoding is evaluated according to heuristics determined by the data distribution. +For example, the algorithm checks whether the data is monotonically non-decreasing or +non-increasing. If so, delta encoding is applied; otherwise, the process +continues with the next encoding method (offset). + +Encoding is specific to each Lucene segment and is reapplied when segments are merged. The merged Lucene segment +might use a different encoding than the original segments, depending on the characteristics of the merged data. + +For keyword fields, **Run Length Encoding (RLE)** is applied to the ordinals, which represent positions in the Lucene +segment-level keyword dictionary. This compression is used when multiple consecutive documents share the same keyword. + +[discrete] +[[logsdb-ignored-settings]] +=== `ignore` settings + +The `logsdb` index mode uses the following `ignore` settings. You can override these settings as needed. + +[discrete] +[[logsdb-ignore-malformed]] +==== `ignore_malformed` + +By default, `logsdb` index mode sets `ignore_malformed` to `true`. With this setting, documents with malformed fields +can be indexed without causing ingestion failures. + +[discrete] +[[logs-db-ignore-above]] +==== `ignore_above` + +In `logsdb` index mode, the `index.mapping.ignore_above` setting is applied by default at the index level to ensure +efficient storage and indexing of large keyword fields.The index-level default for `ignore_above` is 8191 +_characters._ Using UTF-8 encoding, this results in a limit of 32764 bytes, depending on character encoding. + +The mapping-level `ignore_above` setting takes precedence. If a specific field has an `ignore_above` value +defined in its mapping, that value overrides the index-level `index.mapping.ignore_above` value. This default +behavior helps to optimize indexing performance by preventing excessively large string values from being indexed. + +If you need to customize the limit, you can override it at the mapping level or change the index level default. + +[discrete] +[[logs-db-ignore-limit]] +==== `ignore_dynamic_beyond_limit` + +In `logsdb` index mode, the setting `index.mapping.total_fields.ignore_dynamic_beyond_limit` is set to `true` by +default. This setting allows dynamically mapped fields to be added on top of statically defined fields, even when the total number of fields exceeds the `index.mapping.total_fields.limit`. Instead of triggering an index failure, additional dynamically mapped fields are ignored so that ingestion can continue. + +NOTE: When automatically injected, `host.name` and `@timestamp` count toward the limit of mapped fields. If `host.name` is mapped with `subobjects: true`, it has two fields. When mapped with `subobjects: false`, `host.name` has only one field. + +[discrete] +[[logsdb-nodocvalue-fields]] +=== Fields without `doc_values` + +When the `logsdb` index mode uses synthetic `_source` and `doc_values` are disabled for a field in the mapping, +{es} might set the `store` setting to `true` for that field. This ensures that the field's +data remains accessible for reconstructing the document's source when using +<>. + +For example, this adjustment occurs with text fields when `store` is `false` and no suitable multi-field is available for +reconstructing the original value. + +[discrete] +[[logsdb-settings-summary]] +=== Settings reference + +The `logsdb` index mode uses the following settings: + +* **`index.mode`**: `"logsdb"` + +* **`index.mapping.synthetic_source_keep`**: `"arrays"` + +* **`index.sort.field`**: `["host.name", "@timestamp"]` + +* **`index.sort.order`**: `["desc", "desc"]` + +* **`index.sort.mode`**: `["min", "min"]` + +* **`index.sort.missing`**: `["_first", "_first"]` + +* **`index.codec`**: `"best_compression"` + +* **`index.mapping.ignore_malformed`**: `true` + +* **`index.mapping.ignore_above`**: `8191` + +* **`index.mapping.total_fields.ignore_dynamic_beyond_limit`**: `true` diff --git a/docs/reference/data-streams/tsds.asciidoc b/docs/reference/data-streams/tsds.asciidoc index d0d6d4a455c63..1e1d56e5b4d93 100644 --- a/docs/reference/data-streams/tsds.asciidoc +++ b/docs/reference/data-streams/tsds.asciidoc @@ -17,7 +17,7 @@ metrics data. Only use a TSDS if you typically add metrics data to {es} in near real-time and `@timestamp` order. A TSDS is only intended for metrics data. For other timestamped data, such as -logs or traces, use a regular data stream. +logs or traces, use a <> or regular data stream. [discrete] [[differences-from-regular-data-stream]] diff --git a/docs/reference/images/index-mgmt/management-data-stream-fields.png b/docs/reference/images/index-mgmt/management-data-stream-fields.png new file mode 100644 index 0000000000000..605d49b80ab1f Binary files /dev/null and b/docs/reference/images/index-mgmt/management-data-stream-fields.png differ diff --git a/docs/reference/images/index-mgmt/management-data-stream.png b/docs/reference/images/index-mgmt/management-data-stream.png deleted file mode 100644 index 01534fdec2a23..0000000000000 Binary files a/docs/reference/images/index-mgmt/management-data-stream.png and /dev/null differ diff --git a/docs/reference/images/index-mgmt/management-index-templates.png b/docs/reference/images/index-mgmt/management-index-templates.png index 9188aa85e68cd..1ed004e85e71d 100644 Binary files a/docs/reference/images/index-mgmt/management-index-templates.png and b/docs/reference/images/index-mgmt/management-index-templates.png differ diff --git a/docs/reference/index-modules.asciidoc b/docs/reference/index-modules.asciidoc index 1c8f1db216b75..d9b8f8802a04b 100644 --- a/docs/reference/index-modules.asciidoc +++ b/docs/reference/index-modules.asciidoc @@ -113,10 +113,9 @@ Index mode supports the following values: `standard`::: Standard indexing with default settings. -`time_series`::: Index mode optimized for storage of metrics documented in <>. +`tsds`::: _(data streams only)_ Index mode optimized for storage of metrics. For more information, see <>. -`logsdb`::: Index mode optimized for storage of logs. It applies default sort settings on the `hostname` and `timestamp` fields and uses <>. <> on different fields is still allowed. -preview:[] +`logsdb`::: _(data streams only)_ Index mode optimized for <>. [[routing-partition-size]] `index.routing_partition_size`:: diff --git a/docs/reference/indices/index-mgmt.asciidoc b/docs/reference/indices/index-mgmt.asciidoc index 7a78f9452b85e..73643dbfd4b3b 100644 --- a/docs/reference/indices/index-mgmt.asciidoc +++ b/docs/reference/indices/index-mgmt.asciidoc @@ -67,7 +67,7 @@ This value is the time period for which your data is guaranteed to be stored. Da Elasticsearch at a later time. [role="screenshot"] -image::images/index-mgmt/management-data-stream.png[Data stream details] +image::images/index-mgmt/management-data-stream-fields.png[Data stream details] * To view more information about a data stream, such as its generation or its current index lifecycle policy, click the stream's name. From this view, you can navigate to *Discover* to diff --git a/docs/reference/indices/put-index-template.asciidoc b/docs/reference/indices/put-index-template.asciidoc index 36fc66ecb90b8..9a31037546796 100644 --- a/docs/reference/indices/put-index-template.asciidoc +++ b/docs/reference/indices/put-index-template.asciidoc @@ -115,10 +115,10 @@ See <>. `index_mode`:: (Optional, string) Type of data stream to create. Valid values are `null` -(regular data stream) and `time_series` (<>). +(standard data stream), `time_series` (<>) and `logsdb` +(<>). + -If `time_series`, each backing index has an `index.mode` index setting of -`time_series`. +The template's `index_mode` sets the `index.mode` of the backing index. ===== `index_patterns`:: diff --git a/docs/reference/inference/inference-apis.asciidoc b/docs/reference/inference/inference-apis.asciidoc index c7b779a994a05..8d5ee1b7d6ba5 100644 --- a/docs/reference/inference/inference-apis.asciidoc +++ b/docs/reference/inference/inference-apis.asciidoc @@ -48,21 +48,21 @@ When adaptive allocations are enabled: For more information about adaptive allocations and resources, refer to the {ml-docs}/ml-nlp-auto-scale.html[trained model autoscaling] documentation. -//[discrete] -//[[default-enpoints]] -//=== Default {infer} endpoints +[discrete] +[[default-enpoints]] +=== Default {infer} endpoints -//Your {es} deployment contains some preconfigured {infer} endpoints that makes it easier for you to use them when defining `semantic_text` fields or {infer} processors. -//The following list contains the default {infer} endpoints listed by `inference_id`: +Your {es} deployment contains preconfigured {infer} endpoints which makes them easier to use when defining `semantic_text` fields or using {infer} processors. +The following list contains the default {infer} endpoints listed by `inference_id`: -//* `.elser-2-elasticsearch`: uses the {ml-docs}/ml-nlp-elser.html[ELSER] built-in trained model for `sparse_embedding` tasks (recommended for English language texts) -//* `.multilingual-e5-small-elasticsearch`: uses the {ml-docs}/ml-nlp-e5.html[E5] built-in trained model for `text_embedding` tasks (recommended for non-English language texts) +* `.elser-2-elasticsearch`: uses the {ml-docs}/ml-nlp-elser.html[ELSER] built-in trained model for `sparse_embedding` tasks (recommended for English language texts) +* `.multilingual-e5-small-elasticsearch`: uses the {ml-docs}/ml-nlp-e5.html[E5] built-in trained model for `text_embedding` tasks (recommended for non-English language texts) -//Use the `inference_id` of the endpoint in a <> field definition or when creating an <>. -//The API call will automatically download and deploy the model which might take a couple of minutes. -//Default {infer} enpoints have {ml-docs}/ml-nlp-auto-scale.html#nlp-model-adaptive-allocations[adaptive allocations] enabled. -//For these models, the minimum number of allocations is `0`. -//If there is no {infer} activity that uses the endpoint, the number of allocations will scale down to `0` automatically after 15 minutes. +Use the `inference_id` of the endpoint in a <> field definition or when creating an <>. +The API call will automatically download and deploy the model which might take a couple of minutes. +Default {infer} enpoints have {ml-docs}/ml-nlp-auto-scale.html#nlp-model-adaptive-allocations[adaptive allocations] enabled. +For these models, the minimum number of allocations is `0`. +If there is no {infer} activity that uses the endpoint, the number of allocations will scale down to `0` automatically after 15 minutes. [discrete] diff --git a/docs/reference/mapping/fields/synthetic-source.asciidoc b/docs/reference/mapping/fields/synthetic-source.asciidoc index f8666e2993d6a..ddbefb73f4522 100644 --- a/docs/reference/mapping/fields/synthetic-source.asciidoc +++ b/docs/reference/mapping/fields/synthetic-source.asciidoc @@ -1,17 +1,10 @@ [[synthetic-source]] ==== Synthetic `_source` -IMPORTANT: Synthetic `_source` is Generally Available only for TSDB indices -(indices that have `index.mode` set to `time_series`). For other indices, -synthetic `_source` is in technical preview. Features in technical preview may -be changed or removed in a future release. Elastic will work to fix -any issues, but features in technical preview are not subject to the support SLA -of official GA features. - Though very handy to have around, the source field takes up a significant amount of space on disk. Instead of storing source documents on disk exactly as you send them, Elasticsearch can reconstruct source content on the fly upon retrieval. -Enable this by using the value `synthetic` for the index setting `index.mapping.source.mode`: +To enable this https://www.elastic.co/subscriptions[subscription] feature, use the value `synthetic` for the index setting `index.mapping.source.mode`: [source,console,id=enable-synthetic-source-example] ---- @@ -30,7 +23,7 @@ PUT idx ---- // TESTSETUP -While this on the fly reconstruction is *generally* slower than saving the source +While this on-the-fly reconstruction is _generally_ slower than saving the source documents verbatim and loading them at query time, it saves a lot of storage space. Additional latency can be avoided by not loading `_source` field in queries when it is not needed. diff --git a/docs/reference/quickstart/aggs-tutorial.asciidoc b/docs/reference/quickstart/aggs-tutorial.asciidoc new file mode 100644 index 0000000000000..0a8494c3eb75d --- /dev/null +++ b/docs/reference/quickstart/aggs-tutorial.asciidoc @@ -0,0 +1,2184 @@ +[[aggregations-tutorial]] +== Analyze eCommerce data with aggregations using Query DSL +++++ +Basics: Analyze eCommerce data with aggregations +++++ + +This hands-on tutorial shows you how to analyze eCommerce data using {es} <> with the `_search` API and Query DSL. + +You'll learn how to: + +* Calculate key business metrics such as average order value +* Analyze sales patterns over time +* Compare performance across product categories +* Track moving averages and cumulative totals + +[discrete] +[[aggregations-tutorial-requirements]] +=== Requirements + +You'll need: + +. A running instance of <>, either on {serverless-full} or together with {kib} on Elastic Cloud Hosted/Self Managed deployments. +** If you don't have a deployment, you can run the following command in your terminal to set up a <>: ++ +[source,sh] +---- +curl -fsSL https://elastic.co/start-local | sh +---- +// NOTCONSOLE +. The {kibana-ref}/get-started.html#gs-get-data-into-kibana[sample eCommerce data] loaded into {es}. To load sample data follow these steps in your UI: +* Open the *Integrations* pages by searching in the global search field. +* Search for `sample data` in the **Integrations** search field. +* Open the *Sample data* page. +* Select the *Other sample data sets* collapsible. +* Add the *Sample eCommerce orders* data set. +This will create and populate an index called `kibana_sample_data_ecommerce`. + +[discrete] +[[aggregations-tutorial-inspect-data]] +=== Inspect index structure + +Before we start analyzing the data, let's examine the structure of the documents in our sample eCommerce index. Run this command to see the field <>: + +[source,console] +---- +GET kibana_sample_data_ecommerce/_mapping +---- +// TEST[skip:Using Kibana sample data] + +The response shows the field mappings for the `kibana_sample_data_ecommerce` index. + +.Example response +[%collapsible] +============== +[source,console-response] +---- +{ + "kibana_sample_data_ecommerce": { + "mappings": { + "properties": { + "category": { + "type": "text", + "fields": { <1> + "keyword": { + "type": "keyword" + } + } + }, + "currency": { + "type": "keyword" + }, + "customer_birth_date": { + "type": "date" + }, + "customer_first_name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "customer_full_name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "customer_gender": { + "type": "keyword" + }, + "customer_id": { + "type": "keyword" + }, + "customer_last_name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "customer_phone": { + "type": "keyword" + }, + "day_of_week": { + "type": "keyword" + }, + "day_of_week_i": { + "type": "integer" + }, + "email": { + "type": "keyword" + }, + "event": { + "properties": { + "dataset": { + "type": "keyword" + } + } + }, + "geoip": { + "properties": { <2> + "city_name": { + "type": "keyword" + }, + "continent_name": { + "type": "keyword" + }, + "country_iso_code": { + "type": "keyword" + }, + "location": { + "type": "geo_point" <3> + }, + "region_name": { + "type": "keyword" + } + } + }, + "manufacturer": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "order_date": { + "type": "date" + }, + "order_id": { + "type": "keyword" + }, + "products": { + "properties": { <4> + "_id": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "base_price": { + "type": "half_float" + }, + "base_unit_price": { + "type": "half_float" + }, + "category": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "created_on": { + "type": "date" + }, + "discount_amount": { + "type": "half_float" + }, + "discount_percentage": { + "type": "half_float" + }, + "manufacturer": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "min_price": { + "type": "half_float" + }, + "price": { + "type": "half_float" + }, + "product_id": { + "type": "long" + }, + "product_name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + }, + "analyzer": "english" + }, + "quantity": { + "type": "integer" + }, + "sku": { + "type": "keyword" + }, + "tax_amount": { + "type": "half_float" + }, + "taxful_price": { + "type": "half_float" + }, + "taxless_price": { + "type": "half_float" + }, + "unit_discount_amount": { + "type": "half_float" + } + } + }, + "sku": { + "type": "keyword" + }, + "taxful_total_price": { + "type": "half_float" + }, + "taxless_total_price": { + "type": "half_float" + }, + "total_quantity": { + "type": "integer" + }, + "total_unique_products": { + "type": "integer" + }, + "type": { + "type": "keyword" + }, + "user": { + "type": "keyword" + } + } + } + } +} +---- +<1> `fields`: Multi-field mapping that allows both full text and exact matching +<2> `geoip.properties`: Object type field containing location-related properties +<3> `geoip.location`: Geographic coordinates stored as geo_point for location-based queries +<4> `products.properties`: Nested structure containing details about items in each order +============== + +The sample data includes the following <>: + +* <> and <> for text fields +** Most `text` fields have a `.keyword` subfield for exact matching using <> +* <> for date fields +* 3 <> types: +** `integer` for whole numbers +** `long` for large whole numbers +** `half_float` for floating-point numbers +* <> for geographic coordinates +* <> for nested structures such as `products`, `geoip`, `event` + +Now that we understand the structure of our sample data, let's start analyzing it. + +[discrete] +[[aggregations-tutorial-basic-metrics]] +=== Get key business metrics + +Let's start by calculating important metrics about orders and customers. + +[discrete] +[[aggregations-tutorial-order-value]] +==== Get average order size + +Calculate the average order value across all orders in the dataset using the <> aggregation. + +[source,console] +---- +GET kibana_sample_data_ecommerce/_search +{ + "size": 0, <1> + "aggs": { + "avg_order_value": { <2> + "avg": { <3> + "field": "taxful_total_price" + } + } + } +} +---- +// TEST[skip:Using Kibana sample data] +<1> Set `size` to 0 to avoid returning matched documents in the response and return only the aggregation results +<2> A meaningful name that describes what this metric represents +<3> Configures an `avg` aggregation, which calculates a simple arithmetic mean + +.Example response +[%collapsible] +============== +[source,console-result] +---- +{ + "took": 0, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 4675, <1> + "relation": "eq" + }, + "max_score": null, + "hits": [] <2> + }, + "aggregations": { + "avg_order_value": { <3> + "value": 75.05542864304813 <4> + } + } +} +---- +// TEST[skip:Using Kibana sample data] +<1> Total number of orders in the dataset +<2> `hits` is empty because we set `size` to 0 +<3> Results appear under the name we specified in the request +<4> The average order value is calculated dynamically from all the orders in the dataset +============== + +[discrete] +[[aggregations-tutorial-order-stats]] +==== Get multiple order statistics at once + +Calculate multiple statistics about orders in one request using the <> aggregation. + +[source,console] +---- +GET kibana_sample_data_ecommerce/_search +{ + "size": 0, + "aggs": { + "order_stats": { <1> + "stats": { <2> + "field": "taxful_total_price" + } + } + } +} +---- +// TEST[skip:Using Kibana sample data] +<1> A descriptive name for this set of statistics +<2> `stats` returns count, min, max, avg, and sum at once + +.Example response +[%collapsible] +============== +[source,console-result] +---- +{ + "aggregations": { + "order_stats": { + "count": 4675, <1> + "min": 6.98828125, <2> + "max": 2250, <3> + "avg": 75.05542864304813, <4> + "sum": 350884.12890625 <5> + } + } +} +---- +// TEST[skip:Using Kibana sample data] +<1> `"count"`: Total number of orders in the dataset +<2> `"min"`: Lowest individual order value in the dataset +<3> `"max"`: Highest individual order value in the dataset +<4> `"avg"`: Average value per order across all orders +<5> `"sum"`: Total revenue from all orders combined +============== + +[TIP] +==== +The <> is more efficient than running individual min, max, avg, and sum aggregations. +==== + +[discrete] +[[aggregations-tutorial-sales-patterns]] +=== Analyze sales patterns + +Let's group orders in different ways to understand sales patterns. + +[discrete] +[[aggregations-tutorial-category-breakdown]] +==== Break down sales by category + +Group orders by category to see which product categories are most popular, using the <> aggregation. + +[source,console] +---- +GET kibana_sample_data_ecommerce/_search +{ + "size": 0, + "aggs": { + "sales_by_category": { <1> + "terms": { <2> + "field": "category.keyword", <3> + "size": 5, <4> + "order": { "_count": "desc" } <5> + } + } + } +} +---- +// TEST[skip:Using Kibana sample data] +<1> Name reflecting the business purpose of this breakdown +<2> `terms` aggregation groups documents by field values +<3> Use <> field for exact matching on text fields +<4> Limit to top 5 categories +<5> Order by number of orders (descending) + +.Example response +[%collapsible] +============== +[source,console-result] +---- +{ + "took": 4, + "timed_out": false, + "_shards": { + "total": 5, + "successful": 5, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 4675, + "relation": "eq" + }, + "max_score": null, + "hits": [] + }, + "aggregations": { + "sales_by_category": { + "doc_count_error_upper_bound": 0, <1> + "sum_other_doc_count": 572, <2> + "buckets": [ <3> + { + "key": "Men's Clothing", <4> + "doc_count": 2024 <5> + }, + { + "key": "Women's Clothing", + "doc_count": 1903 + }, + { + "key": "Women's Shoes", + "doc_count": 1136 + }, + { + "key": "Men's Shoes", + "doc_count": 944 + }, + { + "key": "Women's Accessories", + "doc_count": 830 + } + ] + } + } +} +---- +// TEST[skip:Using Kibana sample data] +<1> Due to Elasticsearch's distributed architecture, when <> run across multiple shards, the doc counts may have a small margin of error. This value indicates the maximum possible error in the counts. +<2> Count of documents in categories beyond the requested size. +<3> Array of category buckets, ordered by count. +<4> Category name. +<5> Number of orders in this category. +============== + +[discrete] +[[aggregations-tutorial-daily-sales]] +==== Track daily sales patterns + +Group orders by day to track daily sales patterns using the <> aggregation. + +[source,console] +---- +GET kibana_sample_data_ecommerce/_search +{ + "size": 0, + "aggs": { + "daily_orders": { <1> + "date_histogram": { <2> + "field": "order_date", + "calendar_interval": "day", <3> + "format": "yyyy-MM-dd", <4> + "min_doc_count": 0 <5> + } + } + } +} +---- +// TEST[skip:Using Kibana sample data] +<1> Descriptive name for the time-series aggregation results. +<2> The `date_histogram` aggregration groups documents into time-based buckets, similar to terms aggregation but for dates. +<3> Uses <> to handle months with different lengths. `"day"` ensures consistent daily grouping regardless of timezone. +<4> Formats dates in response using <> (e.g. "yyyy-MM-dd"). Refer to <> for additional options. +<5> When `min_doc_count` is 0, returns buckets for days with no orders, useful for continuous time series visualization. + +.Example response +[%collapsible] +============== +[source,console-result] +---- +{ + "took": 2, + "timed_out": false, + "_shards": { + "total": 5, + "successful": 5, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 4675, + "relation": "eq" + }, + "max_score": null, + "hits": [] + }, + "aggregations": { + "daily_orders": { <1> + "buckets": [ <2> + { + "key_as_string": "2024-11-28", <3> + "key": 1732752000000, <4> + "doc_count": 146 <5> + }, + { + "key_as_string": "2024-11-29", + "key": 1732838400000, + "doc_count": 153 + }, + { + "key_as_string": "2024-11-30", + "key": 1732924800000, + "doc_count": 143 + }, + { + "key_as_string": "2024-12-01", + "key": 1733011200000, + "doc_count": 140 + }, + { + "key_as_string": "2024-12-02", + "key": 1733097600000, + "doc_count": 139 + }, + { + "key_as_string": "2024-12-03", + "key": 1733184000000, + "doc_count": 157 + }, + { + "key_as_string": "2024-12-04", + "key": 1733270400000, + "doc_count": 145 + }, + { + "key_as_string": "2024-12-05", + "key": 1733356800000, + "doc_count": 152 + }, + { + "key_as_string": "2024-12-06", + "key": 1733443200000, + "doc_count": 163 + }, + { + "key_as_string": "2024-12-07", + "key": 1733529600000, + "doc_count": 141 + }, + { + "key_as_string": "2024-12-08", + "key": 1733616000000, + "doc_count": 151 + }, + { + "key_as_string": "2024-12-09", + "key": 1733702400000, + "doc_count": 143 + }, + { + "key_as_string": "2024-12-10", + "key": 1733788800000, + "doc_count": 143 + }, + { + "key_as_string": "2024-12-11", + "key": 1733875200000, + "doc_count": 142 + }, + { + "key_as_string": "2024-12-12", + "key": 1733961600000, + "doc_count": 161 + }, + { + "key_as_string": "2024-12-13", + "key": 1734048000000, + "doc_count": 144 + }, + { + "key_as_string": "2024-12-14", + "key": 1734134400000, + "doc_count": 157 + }, + { + "key_as_string": "2024-12-15", + "key": 1734220800000, + "doc_count": 158 + }, + { + "key_as_string": "2024-12-16", + "key": 1734307200000, + "doc_count": 144 + }, + { + "key_as_string": "2024-12-17", + "key": 1734393600000, + "doc_count": 151 + }, + { + "key_as_string": "2024-12-18", + "key": 1734480000000, + "doc_count": 145 + }, + { + "key_as_string": "2024-12-19", + "key": 1734566400000, + "doc_count": 157 + }, + { + "key_as_string": "2024-12-20", + "key": 1734652800000, + "doc_count": 158 + }, + { + "key_as_string": "2024-12-21", + "key": 1734739200000, + "doc_count": 153 + }, + { + "key_as_string": "2024-12-22", + "key": 1734825600000, + "doc_count": 165 + }, + { + "key_as_string": "2024-12-23", + "key": 1734912000000, + "doc_count": 153 + }, + { + "key_as_string": "2024-12-24", + "key": 1734998400000, + "doc_count": 158 + }, + { + "key_as_string": "2024-12-25", + "key": 1735084800000, + "doc_count": 160 + }, + { + "key_as_string": "2024-12-26", + "key": 1735171200000, + "doc_count": 159 + }, + { + "key_as_string": "2024-12-27", + "key": 1735257600000, + "doc_count": 152 + }, + { + "key_as_string": "2024-12-28", + "key": 1735344000000, + "doc_count": 142 + } + ] + } + } +} +---- +// TEST[skip:Using Kibana sample data] +<1> Results of our named aggregation "daily_orders" +<2> Time-based buckets from date_histogram aggregation +<3> `key_as_string` is the human-readable date for this bucket +<4> `key` is the same date represented as the Unix timestamp for this bucket +<5> `doc_count` counts the number of documents that fall into this time bucket +============== + +[discrete] +[[aggregations-tutorial-combined-analysis]] +=== Combine metrics with groupings + +Now let's calculate <> within each group to get deeper insights. + +[discrete] +[[aggregations-tutorial-category-metrics]] +==== Compare category performance + +Calculate metrics within each category to compare performance across categories. + +[source,console] +---- +GET kibana_sample_data_ecommerce/_search +{ + "size": 0, + "aggs": { + "categories": { + "terms": { + "field": "category.keyword", + "size": 5, + "order": { "total_revenue": "desc" } <1> + }, + "aggs": { <2> + "total_revenue": { <3> + "sum": { + "field": "taxful_total_price" + } + }, + "avg_order_value": { <4> + "avg": { + "field": "taxful_total_price" + } + }, + "total_items": { <5> + "sum": { + "field": "total_quantity" + } + } + } + } + } +} +---- +// TEST[skip:Using Kibana sample data] +<1> Order categories by their total revenue instead of count +<2> Define metrics to calculate within each category +<3> Total revenue for the category +<4> Average order value in the category +<5> Total number of items sold + +.Example response +[%collapsible] +============== +[source,console-result] +---- +{ + "aggregations": { + "categories": { + "buckets": [ + { + "key": "Men's Clothing", <1> + "doc_count": 2179, <2> + "total_revenue": { <3> + "value": 156729.453125 + }, + "avg_order_value": { <4> + "value": 71.92726898715927 + }, + "total_items": { <5> + "value": 8716 + } + }, + { + "key": "Women's Clothing", + "doc_count": 2262, + ... + } + ] + } + } +} +---- +// TEST[skip:Using Kibana sample data] +<1> Category name +<2> Number of orders +<3> Total revenue for this category +<4> Average order value for this category +<5> Total quantity of items sold +============== + +[discrete] +[[aggregations-tutorial-daily-metrics]] +==== Analyze daily sales performance + +Let's combine metrics to track daily trends: daily revenue, unique customers, and average basket size. + +[source,console] +---- +GET kibana_sample_data_ecommerce/_search +{ + "size": 0, + "aggs": { + "daily_sales": { + "date_histogram": { + "field": "order_date", + "calendar_interval": "day", + "format": "yyyy-MM-dd" + }, + "aggs": { + "revenue": { <1> + "sum": { + "field": "taxful_total_price" + } + }, + "unique_customers": { <2> + "cardinality": { + "field": "customer_id" + } + }, + "avg_basket_size": { <3> + "avg": { + "field": "total_quantity" + } + } + } + } + } +} +---- +// TEST[skip:Using Kibana sample data] +<1> Daily revenue +<2> Uses the <> aggregation to count unique customers per day +<3> Average number of items per order + +.Example response +[%collapsible] +============== +[source,console-result] +---- +{ + "took": 119, + "timed_out": false, + "_shards": { + "total": 5, + "successful": 5, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 4675, + "relation": "eq" + }, + "max_score": null, + "hits": [] + }, + "aggregations": { + "daily_sales": { + "buckets": [ + { + "key_as_string": "2024-11-14", + "key": 1731542400000, + "doc_count": 146, + "unique_customers": { <1> + "value": 42 + }, + "revenue": { <2> + "value": 10578.53125 + }, + "avg_basket_size": { <3> + "value": 2.1780821917808217 + } + }, + { + "key_as_string": "2024-11-15", + "key": 1731628800000, + "doc_count": 153, + "unique_customers": { + "value": 44 + }, + "revenue": { + "value": 10448 + }, + "avg_basket_size": { + "value": 2.183006535947712 + } + }, + { + "key_as_string": "2024-11-16", + "key": 1731715200000, + "doc_count": 143, + "unique_customers": { + "value": 45 + }, + "revenue": { + "value": 10283.484375 + }, + "avg_basket_size": { + "value": 2.111888111888112 + } + }, + { + "key_as_string": "2024-11-17", + "key": 1731801600000, + "doc_count": 140, + "unique_customers": { + "value": 42 + }, + "revenue": { + "value": 10145.5234375 + }, + "avg_basket_size": { + "value": 2.142857142857143 + } + }, + { + "key_as_string": "2024-11-18", + "key": 1731888000000, + "doc_count": 139, + "unique_customers": { + "value": 42 + }, + "revenue": { + "value": 12012.609375 + }, + "avg_basket_size": { + "value": 2.158273381294964 + } + }, + { + "key_as_string": "2024-11-19", + "key": 1731974400000, + "doc_count": 157, + "unique_customers": { + "value": 43 + }, + "revenue": { + "value": 11009.45703125 + }, + "avg_basket_size": { + "value": 2.0955414012738856 + } + }, + { + "key_as_string": "2024-11-20", + "key": 1732060800000, + "doc_count": 145, + "unique_customers": { + "value": 44 + }, + "revenue": { + "value": 10720.59375 + }, + "avg_basket_size": { + "value": 2.179310344827586 + } + }, + { + "key_as_string": "2024-11-21", + "key": 1732147200000, + "doc_count": 152, + "unique_customers": { + "value": 43 + }, + "revenue": { + "value": 11185.3671875 + }, + "avg_basket_size": { + "value": 2.1710526315789473 + } + }, + { + "key_as_string": "2024-11-22", + "key": 1732233600000, + "doc_count": 163, + "unique_customers": { + "value": 44 + }, + "revenue": { + "value": 13560.140625 + }, + "avg_basket_size": { + "value": 2.2576687116564416 + } + }, + { + "key_as_string": "2024-11-23", + "key": 1732320000000, + "doc_count": 141, + "unique_customers": { + "value": 45 + }, + "revenue": { + "value": 9884.78125 + }, + "avg_basket_size": { + "value": 2.099290780141844 + } + }, + { + "key_as_string": "2024-11-24", + "key": 1732406400000, + "doc_count": 151, + "unique_customers": { + "value": 44 + }, + "revenue": { + "value": 11075.65625 + }, + "avg_basket_size": { + "value": 2.0927152317880795 + } + }, + { + "key_as_string": "2024-11-25", + "key": 1732492800000, + "doc_count": 143, + "unique_customers": { + "value": 41 + }, + "revenue": { + "value": 10323.8515625 + }, + "avg_basket_size": { + "value": 2.167832167832168 + } + }, + { + "key_as_string": "2024-11-26", + "key": 1732579200000, + "doc_count": 143, + "unique_customers": { + "value": 44 + }, + "revenue": { + "value": 10369.546875 + }, + "avg_basket_size": { + "value": 2.167832167832168 + } + }, + { + "key_as_string": "2024-11-27", + "key": 1732665600000, + "doc_count": 142, + "unique_customers": { + "value": 46 + }, + "revenue": { + "value": 11711.890625 + }, + "avg_basket_size": { + "value": 2.1971830985915495 + } + }, + { + "key_as_string": "2024-11-28", + "key": 1732752000000, + "doc_count": 161, + "unique_customers": { + "value": 43 + }, + "revenue": { + "value": 12612.6640625 + }, + "avg_basket_size": { + "value": 2.1180124223602483 + } + }, + { + "key_as_string": "2024-11-29", + "key": 1732838400000, + "doc_count": 144, + "unique_customers": { + "value": 42 + }, + "revenue": { + "value": 10176.87890625 + }, + "avg_basket_size": { + "value": 2.0347222222222223 + } + }, + { + "key_as_string": "2024-11-30", + "key": 1732924800000, + "doc_count": 157, + "unique_customers": { + "value": 43 + }, + "revenue": { + "value": 11480.33203125 + }, + "avg_basket_size": { + "value": 2.159235668789809 + } + }, + { + "key_as_string": "2024-12-01", + "key": 1733011200000, + "doc_count": 158, + "unique_customers": { + "value": 42 + }, + "revenue": { + "value": 11533.265625 + }, + "avg_basket_size": { + "value": 2.0822784810126582 + } + }, + { + "key_as_string": "2024-12-02", + "key": 1733097600000, + "doc_count": 144, + "unique_customers": { + "value": 43 + }, + "revenue": { + "value": 10499.8125 + }, + "avg_basket_size": { + "value": 2.201388888888889 + } + }, + { + "key_as_string": "2024-12-03", + "key": 1733184000000, + "doc_count": 151, + "unique_customers": { + "value": 40 + }, + "revenue": { + "value": 12111.6875 + }, + "avg_basket_size": { + "value": 2.172185430463576 + } + }, + { + "key_as_string": "2024-12-04", + "key": 1733270400000, + "doc_count": 145, + "unique_customers": { + "value": 40 + }, + "revenue": { + "value": 10530.765625 + }, + "avg_basket_size": { + "value": 2.0965517241379312 + } + }, + { + "key_as_string": "2024-12-05", + "key": 1733356800000, + "doc_count": 157, + "unique_customers": { + "value": 43 + }, + "revenue": { + "value": 11872.5625 + }, + "avg_basket_size": { + "value": 2.1464968152866244 + } + }, + { + "key_as_string": "2024-12-06", + "key": 1733443200000, + "doc_count": 158, + "unique_customers": { + "value": 42 + }, + "revenue": { + "value": 12109.453125 + }, + "avg_basket_size": { + "value": 2.151898734177215 + } + }, + { + "key_as_string": "2024-12-07", + "key": 1733529600000, + "doc_count": 153, + "unique_customers": { + "value": 42 + }, + "revenue": { + "value": 11057.40625 + }, + "avg_basket_size": { + "value": 2.111111111111111 + } + }, + { + "key_as_string": "2024-12-08", + "key": 1733616000000, + "doc_count": 165, + "unique_customers": { + "value": 42 + }, + "revenue": { + "value": 13095.609375 + }, + "avg_basket_size": { + "value": 2.1818181818181817 + } + }, + { + "key_as_string": "2024-12-09", + "key": 1733702400000, + "doc_count": 153, + "unique_customers": { + "value": 41 + }, + "revenue": { + "value": 12574.015625 + }, + "avg_basket_size": { + "value": 2.2287581699346406 + } + }, + { + "key_as_string": "2024-12-10", + "key": 1733788800000, + "doc_count": 158, + "unique_customers": { + "value": 42 + }, + "revenue": { + "value": 11188.1875 + }, + "avg_basket_size": { + "value": 2.151898734177215 + } + }, + { + "key_as_string": "2024-12-11", + "key": 1733875200000, + "doc_count": 160, + "unique_customers": { + "value": 42 + }, + "revenue": { + "value": 12117.65625 + }, + "avg_basket_size": { + "value": 2.20625 + } + }, + { + "key_as_string": "2024-12-12", + "key": 1733961600000, + "doc_count": 159, + "unique_customers": { + "value": 45 + }, + "revenue": { + "value": 11558.25 + }, + "avg_basket_size": { + "value": 2.1823899371069184 + } + }, + { + "key_as_string": "2024-12-13", + "key": 1734048000000, + "doc_count": 152, + "unique_customers": { + "value": 45 + }, + "revenue": { + "value": 11921.1171875 + }, + "avg_basket_size": { + "value": 2.289473684210526 + } + }, + { + "key_as_string": "2024-12-14", + "key": 1734134400000, + "doc_count": 142, + "unique_customers": { + "value": 45 + }, + "revenue": { + "value": 11135.03125 + }, + "avg_basket_size": { + "value": 2.183098591549296 + } + } + ] + } + } +} +---- +// TEST[skip:Using Kibana sample data] +============== + +[discrete] +[[aggregations-tutorial-trends]] +=== Track trends and patterns + +You can use <> on the results of other aggregations. +Let's analyze how metrics change over time. + +[discrete] +[[aggregations-tutorial-moving-average]] +==== Smooth out daily fluctuations + +Moving averages help identify trends by reducing day-to-day noise in the data. +Let's observe sales trends more clearly by smoothing daily revenue variations, using the <> aggregation. + +[source,console] +---- +GET kibana_sample_data_ecommerce/_search +{ + "size": 0, + "aggs": { + "daily_sales": { + "date_histogram": { + "field": "order_date", + "calendar_interval": "day" + }, + "aggs": { + "daily_revenue": { <1> + "sum": { + "field": "taxful_total_price" + } + }, + "smoothed_revenue": { <2> + "moving_fn": { <3> + "buckets_path": "daily_revenue", <4> + "window": 3, <5> + "script": "MovingFunctions.unweightedAvg(values)" <6> + } + } + } + } + } +} +---- +// TEST[skip:Using Kibana sample data] +<1> Calculate daily revenue first. +<2> Create a smoothed version of the daily revenue. +<3> Use `moving_fn` for moving window calculations. +<4> Reference the revenue from our date histogram. +<5> Use a 3-day window — use different window sizes to see trends at different time scales. +<6> Use the built-in unweighted average function in the `moving_fn` aggregation. + +.Example response +[%collapsible] +============== +[source,console-result] +---- +{ + "took": 13, + "timed_out": false, + "_shards": { + "total": 5, + "successful": 5, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 4675, + "relation": "eq" + }, + "max_score": null, + "hits": [] + }, + "aggregations": { + "daily_sales": { + "buckets": [ + { + "key_as_string": "2024-11-14T00:00:00.000Z", <1> + "key": 1731542400000, + "doc_count": 146, <2> + "daily_revenue": { <3> + "value": 10578.53125 + }, + "smoothed_revenue": { <4> + "value": null + } + }, + { + "key_as_string": "2024-11-15T00:00:00.000Z", + "key": 1731628800000, + "doc_count": 153, + "daily_revenue": { + "value": 10448 + }, + "smoothed_revenue": { <5> + "value": 10578.53125 + } + }, + { + "key_as_string": "2024-11-16T00:00:00.000Z", + "key": 1731715200000, + "doc_count": 143, + "daily_revenue": { + "value": 10283.484375 + }, + "smoothed_revenue": { + "value": 10513.265625 + } + }, + { + "key_as_string": "2024-11-17T00:00:00.000Z", + "key": 1731801600000, + "doc_count": 140, + "daily_revenue": { + "value": 10145.5234375 + }, + "smoothed_revenue": { + "value": 10436.671875 + } + }, + { + "key_as_string": "2024-11-18T00:00:00.000Z", + "key": 1731888000000, + "doc_count": 139, + "daily_revenue": { + "value": 12012.609375 + }, + "smoothed_revenue": { + "value": 10292.3359375 + } + }, + { + "key_as_string": "2024-11-19T00:00:00.000Z", + "key": 1731974400000, + "doc_count": 157, + "daily_revenue": { + "value": 11009.45703125 + }, + "smoothed_revenue": { + "value": 10813.872395833334 + } + }, + { + "key_as_string": "2024-11-20T00:00:00.000Z", + "key": 1732060800000, + "doc_count": 145, + "daily_revenue": { + "value": 10720.59375 + }, + "smoothed_revenue": { + "value": 11055.86328125 + } + }, + { + "key_as_string": "2024-11-21T00:00:00.000Z", + "key": 1732147200000, + "doc_count": 152, + "daily_revenue": { + "value": 11185.3671875 + }, + "smoothed_revenue": { + "value": 11247.553385416666 + } + }, + { + "key_as_string": "2024-11-22T00:00:00.000Z", + "key": 1732233600000, + "doc_count": 163, + "daily_revenue": { + "value": 13560.140625 + }, + "smoothed_revenue": { + "value": 10971.805989583334 + } + }, + { + "key_as_string": "2024-11-23T00:00:00.000Z", + "key": 1732320000000, + "doc_count": 141, + "daily_revenue": { + "value": 9884.78125 + }, + "smoothed_revenue": { + "value": 11822.033854166666 + } + }, + { + "key_as_string": "2024-11-24T00:00:00.000Z", + "key": 1732406400000, + "doc_count": 151, + "daily_revenue": { + "value": 11075.65625 + }, + "smoothed_revenue": { + "value": 11543.4296875 + } + }, + { + "key_as_string": "2024-11-25T00:00:00.000Z", + "key": 1732492800000, + "doc_count": 143, + "daily_revenue": { + "value": 10323.8515625 + }, + "smoothed_revenue": { + "value": 11506.859375 + } + }, + { + "key_as_string": "2024-11-26T00:00:00.000Z", + "key": 1732579200000, + "doc_count": 143, + "daily_revenue": { + "value": 10369.546875 + }, + "smoothed_revenue": { + "value": 10428.096354166666 + } + }, + { + "key_as_string": "2024-11-27T00:00:00.000Z", + "key": 1732665600000, + "doc_count": 142, + "daily_revenue": { + "value": 11711.890625 + }, + "smoothed_revenue": { + "value": 10589.684895833334 + } + }, + { + "key_as_string": "2024-11-28T00:00:00.000Z", + "key": 1732752000000, + "doc_count": 161, + "daily_revenue": { + "value": 12612.6640625 + }, + "smoothed_revenue": { + "value": 10801.763020833334 + } + }, + { + "key_as_string": "2024-11-29T00:00:00.000Z", + "key": 1732838400000, + "doc_count": 144, + "daily_revenue": { + "value": 10176.87890625 + }, + "smoothed_revenue": { + "value": 11564.700520833334 + } + }, + { + "key_as_string": "2024-11-30T00:00:00.000Z", + "key": 1732924800000, + "doc_count": 157, + "daily_revenue": { + "value": 11480.33203125 + }, + "smoothed_revenue": { + "value": 11500.477864583334 + } + }, + { + "key_as_string": "2024-12-01T00:00:00.000Z", + "key": 1733011200000, + "doc_count": 158, + "daily_revenue": { + "value": 11533.265625 + }, + "smoothed_revenue": { + "value": 11423.291666666666 + } + }, + { + "key_as_string": "2024-12-02T00:00:00.000Z", + "key": 1733097600000, + "doc_count": 144, + "daily_revenue": { + "value": 10499.8125 + }, + "smoothed_revenue": { + "value": 11063.4921875 + } + }, + { + "key_as_string": "2024-12-03T00:00:00.000Z", + "key": 1733184000000, + "doc_count": 151, + "daily_revenue": { + "value": 12111.6875 + }, + "smoothed_revenue": { + "value": 11171.13671875 + } + }, + { + "key_as_string": "2024-12-04T00:00:00.000Z", + "key": 1733270400000, + "doc_count": 145, + "daily_revenue": { + "value": 10530.765625 + }, + "smoothed_revenue": { + "value": 11381.588541666666 + } + }, + { + "key_as_string": "2024-12-05T00:00:00.000Z", + "key": 1733356800000, + "doc_count": 157, + "daily_revenue": { + "value": 11872.5625 + }, + "smoothed_revenue": { + "value": 11047.421875 + } + }, + { + "key_as_string": "2024-12-06T00:00:00.000Z", + "key": 1733443200000, + "doc_count": 158, + "daily_revenue": { + "value": 12109.453125 + }, + "smoothed_revenue": { + "value": 11505.005208333334 + } + }, + { + "key_as_string": "2024-12-07T00:00:00.000Z", + "key": 1733529600000, + "doc_count": 153, + "daily_revenue": { + "value": 11057.40625 + }, + "smoothed_revenue": { + "value": 11504.260416666666 + } + }, + { + "key_as_string": "2024-12-08T00:00:00.000Z", + "key": 1733616000000, + "doc_count": 165, + "daily_revenue": { + "value": 13095.609375 + }, + "smoothed_revenue": { + "value": 11679.807291666666 + } + }, + { + "key_as_string": "2024-12-09T00:00:00.000Z", + "key": 1733702400000, + "doc_count": 153, + "daily_revenue": { + "value": 12574.015625 + }, + "smoothed_revenue": { + "value": 12087.489583333334 + } + }, + { + "key_as_string": "2024-12-10T00:00:00.000Z", + "key": 1733788800000, + "doc_count": 158, + "daily_revenue": { + "value": 11188.1875 + }, + "smoothed_revenue": { + "value": 12242.34375 + } + }, + { + "key_as_string": "2024-12-11T00:00:00.000Z", + "key": 1733875200000, + "doc_count": 160, + "daily_revenue": { + "value": 12117.65625 + }, + "smoothed_revenue": { + "value": 12285.9375 + } + }, + { + "key_as_string": "2024-12-12T00:00:00.000Z", + "key": 1733961600000, + "doc_count": 159, + "daily_revenue": { + "value": 11558.25 + }, + "smoothed_revenue": { + "value": 11959.953125 + } + }, + { + "key_as_string": "2024-12-13T00:00:00.000Z", + "key": 1734048000000, + "doc_count": 152, + "daily_revenue": { + "value": 11921.1171875 + }, + "smoothed_revenue": { + "value": 11621.364583333334 + } + }, + { + "key_as_string": "2024-12-14T00:00:00.000Z", + "key": 1734134400000, + "doc_count": 142, + "daily_revenue": { + "value": 11135.03125 + }, + "smoothed_revenue": { + "value": 11865.674479166666 + } + } + ] + } + } +} +---- +// TEST[skip:Using Kibana sample data] +<1> Date of the bucket is in default ISO format because we didn't specify a format +<2> Number of orders for this day +<3> Raw daily revenue before smoothing +<4> First day has no smoothed value as it needs previous days for the calculation +<5> Moving average starts from second day, using a 3-day window +============== + +[TIP] +==== +Notice how the smoothed values lag behind the actual values - this is because they need previous days' data to calculate. The first day will always be null when using moving averages. +==== + +[discrete] +[[aggregations-tutorial-cumulative]] +==== Track running totals + +Track running totals over time using the <> aggregation. + +[source,console] +---- +GET kibana_sample_data_ecommerce/_search +{ + "size": 0, + "aggs": { + "daily_sales": { + "date_histogram": { + "field": "order_date", + "calendar_interval": "day" + }, + "aggs": { + "revenue": { + "sum": { + "field": "taxful_total_price" + } + }, + "cumulative_revenue": { <1> + "cumulative_sum": { <2> + "buckets_path": "revenue" <3> + } + } + } + } + } +} +---- +// TEST[skip:Using Kibana sample data] +<1> Name for our running total +<2> `cumulative_sum` adds up values across buckets +<3> Reference the revenue we want to accumulate + +.Example response +[%collapsible] +============== +[source,console-result] +---- +{ + "took": 4, + "timed_out": false, + "_shards": { + "total": 5, + "successful": 5, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 4675, + "relation": "eq" + }, + "max_score": null, + "hits": [] + }, + "aggregations": { + "daily_sales": { <1> + "buckets": [ <2> + { + "key_as_string": "2024-11-14T00:00:00.000Z", <3> + "key": 1731542400000, + "doc_count": 146, + "revenue": { <4> + "value": 10578.53125 + }, + "cumulative_revenue": { <5> + "value": 10578.53125 + } + }, + { + "key_as_string": "2024-11-15T00:00:00.000Z", + "key": 1731628800000, + "doc_count": 153, + "revenue": { + "value": 10448 + }, + "cumulative_revenue": { + "value": 21026.53125 + } + }, + { + "key_as_string": "2024-11-16T00:00:00.000Z", + "key": 1731715200000, + "doc_count": 143, + "revenue": { + "value": 10283.484375 + }, + "cumulative_revenue": { + "value": 31310.015625 + } + }, + { + "key_as_string": "2024-11-17T00:00:00.000Z", + "key": 1731801600000, + "doc_count": 140, + "revenue": { + "value": 10145.5234375 + }, + "cumulative_revenue": { + "value": 41455.5390625 + } + }, + { + "key_as_string": "2024-11-18T00:00:00.000Z", + "key": 1731888000000, + "doc_count": 139, + "revenue": { + "value": 12012.609375 + }, + "cumulative_revenue": { + "value": 53468.1484375 + } + }, + { + "key_as_string": "2024-11-19T00:00:00.000Z", + "key": 1731974400000, + "doc_count": 157, + "revenue": { + "value": 11009.45703125 + }, + "cumulative_revenue": { + "value": 64477.60546875 + } + }, + { + "key_as_string": "2024-11-20T00:00:00.000Z", + "key": 1732060800000, + "doc_count": 145, + "revenue": { + "value": 10720.59375 + }, + "cumulative_revenue": { + "value": 75198.19921875 + } + }, + { + "key_as_string": "2024-11-21T00:00:00.000Z", + "key": 1732147200000, + "doc_count": 152, + "revenue": { + "value": 11185.3671875 + }, + "cumulative_revenue": { + "value": 86383.56640625 + } + }, + { + "key_as_string": "2024-11-22T00:00:00.000Z", + "key": 1732233600000, + "doc_count": 163, + "revenue": { + "value": 13560.140625 + }, + "cumulative_revenue": { + "value": 99943.70703125 + } + }, + { + "key_as_string": "2024-11-23T00:00:00.000Z", + "key": 1732320000000, + "doc_count": 141, + "revenue": { + "value": 9884.78125 + }, + "cumulative_revenue": { + "value": 109828.48828125 + } + }, + { + "key_as_string": "2024-11-24T00:00:00.000Z", + "key": 1732406400000, + "doc_count": 151, + "revenue": { + "value": 11075.65625 + }, + "cumulative_revenue": { + "value": 120904.14453125 + } + }, + { + "key_as_string": "2024-11-25T00:00:00.000Z", + "key": 1732492800000, + "doc_count": 143, + "revenue": { + "value": 10323.8515625 + }, + "cumulative_revenue": { + "value": 131227.99609375 + } + }, + { + "key_as_string": "2024-11-26T00:00:00.000Z", + "key": 1732579200000, + "doc_count": 143, + "revenue": { + "value": 10369.546875 + }, + "cumulative_revenue": { + "value": 141597.54296875 + } + }, + { + "key_as_string": "2024-11-27T00:00:00.000Z", + "key": 1732665600000, + "doc_count": 142, + "revenue": { + "value": 11711.890625 + }, + "cumulative_revenue": { + "value": 153309.43359375 + } + }, + { + "key_as_string": "2024-11-28T00:00:00.000Z", + "key": 1732752000000, + "doc_count": 161, + "revenue": { + "value": 12612.6640625 + }, + "cumulative_revenue": { + "value": 165922.09765625 + } + }, + { + "key_as_string": "2024-11-29T00:00:00.000Z", + "key": 1732838400000, + "doc_count": 144, + "revenue": { + "value": 10176.87890625 + }, + "cumulative_revenue": { + "value": 176098.9765625 + } + }, + { + "key_as_string": "2024-11-30T00:00:00.000Z", + "key": 1732924800000, + "doc_count": 157, + "revenue": { + "value": 11480.33203125 + }, + "cumulative_revenue": { + "value": 187579.30859375 + } + }, + { + "key_as_string": "2024-12-01T00:00:00.000Z", + "key": 1733011200000, + "doc_count": 158, + "revenue": { + "value": 11533.265625 + }, + "cumulative_revenue": { + "value": 199112.57421875 + } + }, + { + "key_as_string": "2024-12-02T00:00:00.000Z", + "key": 1733097600000, + "doc_count": 144, + "revenue": { + "value": 10499.8125 + }, + "cumulative_revenue": { + "value": 209612.38671875 + } + }, + { + "key_as_string": "2024-12-03T00:00:00.000Z", + "key": 1733184000000, + "doc_count": 151, + "revenue": { + "value": 12111.6875 + }, + "cumulative_revenue": { + "value": 221724.07421875 + } + }, + { + "key_as_string": "2024-12-04T00:00:00.000Z", + "key": 1733270400000, + "doc_count": 145, + "revenue": { + "value": 10530.765625 + }, + "cumulative_revenue": { + "value": 232254.83984375 + } + }, + { + "key_as_string": "2024-12-05T00:00:00.000Z", + "key": 1733356800000, + "doc_count": 157, + "revenue": { + "value": 11872.5625 + }, + "cumulative_revenue": { + "value": 244127.40234375 + } + }, + { + "key_as_string": "2024-12-06T00:00:00.000Z", + "key": 1733443200000, + "doc_count": 158, + "revenue": { + "value": 12109.453125 + }, + "cumulative_revenue": { + "value": 256236.85546875 + } + }, + { + "key_as_string": "2024-12-07T00:00:00.000Z", + "key": 1733529600000, + "doc_count": 153, + "revenue": { + "value": 11057.40625 + }, + "cumulative_revenue": { + "value": 267294.26171875 + } + }, + { + "key_as_string": "2024-12-08T00:00:00.000Z", + "key": 1733616000000, + "doc_count": 165, + "revenue": { + "value": 13095.609375 + }, + "cumulative_revenue": { + "value": 280389.87109375 + } + }, + { + "key_as_string": "2024-12-09T00:00:00.000Z", + "key": 1733702400000, + "doc_count": 153, + "revenue": { + "value": 12574.015625 + }, + "cumulative_revenue": { + "value": 292963.88671875 + } + }, + { + "key_as_string": "2024-12-10T00:00:00.000Z", + "key": 1733788800000, + "doc_count": 158, + "revenue": { + "value": 11188.1875 + }, + "cumulative_revenue": { + "value": 304152.07421875 + } + }, + { + "key_as_string": "2024-12-11T00:00:00.000Z", + "key": 1733875200000, + "doc_count": 160, + "revenue": { + "value": 12117.65625 + }, + "cumulative_revenue": { + "value": 316269.73046875 + } + }, + { + "key_as_string": "2024-12-12T00:00:00.000Z", + "key": 1733961600000, + "doc_count": 159, + "revenue": { + "value": 11558.25 + }, + "cumulative_revenue": { + "value": 327827.98046875 + } + }, + { + "key_as_string": "2024-12-13T00:00:00.000Z", + "key": 1734048000000, + "doc_count": 152, + "revenue": { + "value": 11921.1171875 + }, + "cumulative_revenue": { + "value": 339749.09765625 + } + }, + { + "key_as_string": "2024-12-14T00:00:00.000Z", + "key": 1734134400000, + "doc_count": 142, + "revenue": { + "value": 11135.03125 + }, + "cumulative_revenue": { + "value": 350884.12890625 + } + } + ] + } + } +} +---- +// TEST[skip:Using Kibana sample data] +<1> `daily_sales`: Results from our daily sales date histogram +<2> `buckets`: Array of time-based buckets +<3> `key_as_string`: Date for this bucket (in ISO format since no format specified) +<4> `revenue`: Daily revenue for this date +<5> `cumulative_revenue`: Running total of revenue up to this date +============== + +[discrete] +[[aggregations-tutorial-next-steps]] +=== Next steps + +Refer to the <> for more details on all available aggregation types. \ No newline at end of file diff --git a/docs/reference/quickstart/index.asciidoc b/docs/reference/quickstart/index.asciidoc index 31ba67e1b7a60..330582956c457 100644 --- a/docs/reference/quickstart/index.asciidoc +++ b/docs/reference/quickstart/index.asciidoc @@ -26,6 +26,7 @@ Alternatively, refer to our <>. Learn about indices, documents, and mappings, and perform a basic search using the Query DSL. * <>. Learn about different options for querying data, including full-text search and filtering, using the Query DSL. * <>: Learn how to query and aggregate your data using {esql}. +* <>. Learn how to analyze data using different types of aggregations, including metrics, buckets, and pipelines. * <>: Learn how to create embeddings for your data with `semantic_text` and query using the `semantic` query. ** <>: Learn how to combine semantic search with full-text search. * <>: Learn how to ingest dense vector embeddings into {es}. @@ -41,3 +42,4 @@ If you're interested in using {es} with Python, check out Elastic Search Labs: include::getting-started.asciidoc[] include::full-text-filtering-tutorial.asciidoc[] +include::aggs-tutorial.asciidoc[] diff --git a/docs/reference/release-notes/8.17.0.asciidoc b/docs/reference/release-notes/8.17.0.asciidoc index 813da8c4e30da..7adaf21272301 100644 --- a/docs/reference/release-notes/8.17.0.asciidoc +++ b/docs/reference/release-notes/8.17.0.asciidoc @@ -5,6 +5,19 @@ coming[8.17.0] Also see <>. +[[license-8.17.0]] +[float] +=== License changes + +[float] +==== Change to synthetic `_source` licensing + +Starting with this release, the <> feature is available exclusively with the Enterprise subscription. Synthetic `_source` is used in logs data streams (`logsdb` index mode), time series data streams (TSDS, using `time_series` index mode), application performance monitoring (APM), and Universal Profiling. + +If you are using these capabilities and are not on an Enterprise license, the change will result in increased storage requirements for new data, as the synthetic `_source` setting will be ignored. Existing indices that used synthetic `_source` will remain seamlessly accessible. + +Refer to the subscription page for https://www.elastic.co/subscriptions/cloud[Elastic Cloud] and {subscriptions}[Elastic Stack/self-managed] for the breakdown of available features and their associated subscription tiers. For further details and subscription options, contact your Elastic sales representative or https://www.elastic.co/contact[contact us]. + [[bug-8.17.0]] [float] === Bug fixes @@ -126,8 +139,8 @@ Indices APIs:: * Ensure class resource stream is closed in `ResourceUtils` {es-pull}116437[#116437] Inference:: -* [8.17] Add version prefix to Inference Service API path {es-pull}117366[#117366] -* [8.17] Update sparse text embeddings API route for Inference Service {es-pull}118368[#118368] +* Add version prefix to Inference Service API path {es-pull}117366[#117366] +* Update sparse text embeddings API route for Inference Service {es-pull}118368[#118368] Infra/Core:: * Support for unsigned 64 bit numbers in Cpu stats {es-pull}114681[#114681] (issue: {es-issue}112274[#112274]) diff --git a/docs/reference/search/search-your-data/ccs-version-compat-matrix.asciidoc b/docs/reference/search/search-your-data/ccs-version-compat-matrix.asciidoc index 5859ccd03e511..a68f20fb1c656 100644 --- a/docs/reference/search/search-your-data/ccs-version-compat-matrix.asciidoc +++ b/docs/reference/search/search-your-data/ccs-version-compat-matrix.asciidoc @@ -1,25 +1,26 @@ |==== -| 20+^h| Remote cluster version +| 21+^h| Remote cluster version h| Local cluster version - | 6.8 | 7.1–7.16 | 7.17 | 8.0 | 8.1 | 8.2 | 8.3 | 8.4 | 8.5 | 8.6 | 8.7 | 8.8 | 8.9 | 8.10 | 8.11 | 8.12 | 8.13 | 8.14 | 8.15 | 8.16 -| 6.8 | {yes-icon} | {yes-icon} | {yes-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} -| 7.1–7.16 | {yes-icon} | {yes-icon} | {yes-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} -| 7.17 | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.0 | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.1 | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.2 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.3 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.4 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.5 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.6 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.7 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.8 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.9 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.10 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.11 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.12 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.13 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.14 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.15 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.16 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} + | 6.8 | 7.1–7.16 | 7.17 | 8.0 | 8.1 | 8.2 | 8.3 | 8.4 | 8.5 | 8.6 | 8.7 | 8.8 | 8.9 | 8.10 | 8.11 | 8.12 | 8.13 | 8.14 | 8.15 | 8.16 | 8.17 +| 6.8 | {yes-icon} | {yes-icon} | {yes-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} +| 7.1–7.16 | {yes-icon} | {yes-icon} | {yes-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} +| 7.17 | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.0 | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.1 | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.2 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.3 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.4 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.5 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.6 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.7 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.8 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.9 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.10 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.11 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.12 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.13 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.14 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.15 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.16 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.17 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} |==== diff --git a/docs/reference/search/search-your-data/semantic-search-inference.asciidoc b/docs/reference/search/search-your-data/semantic-search-inference.asciidoc index 0abc44c809d08..c2fcb88380f53 100644 --- a/docs/reference/search/search-your-data/semantic-search-inference.asciidoc +++ b/docs/reference/search/search-your-data/semantic-search-inference.asciidoc @@ -45,7 +45,7 @@ include::{es-ref-dir}/tab-widgets/inference-api/infer-api-task-widget.asciidoc[] ==== Create the index mapping The mapping of the destination index - the index that contains the embeddings that the model will create based on your input text - must be created. -The destination index must have a field with the <> field type for most models and the <> field type for the sparse vector models like in the case of the `elser` service to index the output of the used model. +The destination index must have a field with the <> field type for most models and the <> field type for the sparse vector models like in the case of the `elasticsearch` service to index the output of the used model. include::{es-ref-dir}/tab-widgets/inference-api/infer-api-mapping-widget.asciidoc[] diff --git a/docs/reference/tab-widgets/inference-api/infer-api-requirements.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-requirements.asciidoc index eeecb4718658a..9e935f79aa0ac 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-requirements.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-requirements.asciidoc @@ -8,7 +8,7 @@ the Cohere service. // tag::elser[] ELSER is a model trained by Elastic. If you have an {es} deployment, there is no -further requirement for using the {infer} API with the `elser` service. +further requirement for using the {infer} API with the `elasticsearch` service. // end::elser[] diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java index d0a8dfae4f242..19ae649c12a83 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java @@ -171,6 +171,7 @@ public void parse(DocumentParserContext context) throws IOException { ); } + final boolean isWithinLeaf = context.path().isWithinLeafObject(); String feature = null; try { // make sure that we don't expand dots in field names while parsing @@ -205,7 +206,7 @@ public void parse(DocumentParserContext context) throws IOException { context.addToFieldNames(fieldType().name()); } } finally { - context.path().setWithinLeafObject(false); + context.path().setWithinLeafObject(isWithinLeaf); } } diff --git a/x-pack/plugin/security/qa/multi-cluster/build.gradle b/x-pack/plugin/security/qa/multi-cluster/build.gradle index 646ecd366639b..78ea22f0c6f1c 100644 --- a/x-pack/plugin/security/qa/multi-cluster/build.gradle +++ b/x-pack/plugin/security/qa/multi-cluster/build.gradle @@ -25,6 +25,7 @@ dependencies { clusterModules project(':x-pack:plugin:enrich') clusterModules project(':x-pack:plugin:autoscaling') clusterModules project(':x-pack:plugin:ml') + clusterModules project(xpackModule('ilm')) clusterModules(project(":modules:ingest-common")) } diff --git a/x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityDataStreamEsqlRcs1IT.java b/x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityDataStreamEsqlRcs1IT.java new file mode 100644 index 0000000000000..57eb583912c49 --- /dev/null +++ b/x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityDataStreamEsqlRcs1IT.java @@ -0,0 +1,402 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.remotecluster; + +import org.elasticsearch.Build; +import org.elasticsearch.client.Request; +import org.elasticsearch.client.RequestOptions; +import org.elasticsearch.client.Response; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.CheckedFunction; +import org.elasticsearch.core.Strings; +import org.elasticsearch.test.MapMatcher; +import org.elasticsearch.test.cluster.ElasticsearchCluster; +import org.elasticsearch.test.cluster.util.resource.Resource; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.json.JsonXContent; +import org.junit.ClassRule; +import org.junit.rules.RuleChain; +import org.junit.rules.TestRule; + +import java.io.IOException; +import java.util.List; +import java.util.Locale; + +import static org.elasticsearch.test.ListMatcher.matchesList; +import static org.elasticsearch.test.MapMatcher.assertMap; +import static org.elasticsearch.test.MapMatcher.matchesMap; + +// TODO consolidate me with RemoteClusterSecurityDataStreamEsqlRcs2IT +public class RemoteClusterSecurityDataStreamEsqlRcs1IT extends AbstractRemoteClusterSecurityTestCase { + static { + fulfillingCluster = ElasticsearchCluster.local() + .name("fulfilling-cluster") + .module("x-pack-autoscaling") + .module("x-pack-esql") + .module("x-pack-enrich") + .module("x-pack-ml") + .module("x-pack-ilm") + .module("ingest-common") + .apply(commonClusterConfig) + .setting("xpack.ml.enabled", "false") + .setting("xpack.security.authc.token.enabled", "true") + .rolesFile(Resource.fromClasspath("roles.yml")) + .build(); + + queryCluster = ElasticsearchCluster.local() + .name("query-cluster") + .module("x-pack-autoscaling") + .module("x-pack-esql") + .module("x-pack-enrich") + .module("x-pack-ml") + .module("x-pack-ilm") + .module("ingest-common") + .apply(commonClusterConfig) + .setting("xpack.ml.enabled", "false") + .setting("xpack.security.authc.token.enabled", "true") + .rolesFile(Resource.fromClasspath("roles.yml")) + .user("logs_foo_all", "x-pack-test-password", "logs_foo_all", false) + .user("logs_foo_16_only", "x-pack-test-password", "logs_foo_16_only", false) + .user("logs_foo_after_2021", "x-pack-test-password", "logs_foo_after_2021", false) + .user("logs_foo_after_2021_pattern", "x-pack-test-password", "logs_foo_after_2021_pattern", false) + .user("logs_foo_after_2021_alias", "x-pack-test-password", "logs_foo_after_2021_alias", false) + .build(); + } + + @ClassRule + public static TestRule clusterRule = RuleChain.outerRule(fulfillingCluster).around(queryCluster); + + public void testDataStreamsWithDlsAndFls() throws Exception { + configureRemoteCluster(REMOTE_CLUSTER_ALIAS, fulfillingCluster, true, randomBoolean(), randomBoolean()); + createDataStreamOnFulfillingCluster(); + setupAdditionalUsersAndRoles(); + + doTestDataStreamsWithFlsAndDls(); + } + + private void setupAdditionalUsersAndRoles() throws IOException { + createUserAndRoleOnQueryCluster("fls_user_logs_pattern", "fls_user_logs_pattern", """ + { + "indices": [ + { + "names": ["logs-*"], + "privileges": ["read"], + "field_security": { + "grant": ["@timestamp", "data_stream.namespace"] + } + } + ] + }"""); + createUserAndRoleOnFulfillingCluster("fls_user_logs_pattern", "fls_user_logs_pattern", """ + { + "indices": [ + { + "names": ["logs-*"], + "privileges": ["read"], + "field_security": { + "grant": ["@timestamp", "data_stream.namespace"] + } + } + ] + }"""); + } + + static void createUserAndRoleOnQueryCluster(String username, String roleName, String roleJson) throws IOException { + final var putRoleRequest = new Request("PUT", "/_security/role/" + roleName); + putRoleRequest.setJsonEntity(roleJson); + assertOK(adminClient().performRequest(putRoleRequest)); + + final var putUserRequest = new Request("PUT", "/_security/user/" + username); + putUserRequest.setJsonEntity(Strings.format(""" + { + "password": "%s", + "roles" : ["%s"] + }""", PASS, roleName)); + assertOK(adminClient().performRequest(putUserRequest)); + } + + static void createUserAndRoleOnFulfillingCluster(String username, String roleName, String roleJson) throws IOException { + final var putRoleRequest = new Request("PUT", "/_security/role/" + roleName); + putRoleRequest.setJsonEntity(roleJson); + assertOK(performRequestAgainstFulfillingCluster(putRoleRequest)); + + final var putUserRequest = new Request("PUT", "/_security/user/" + username); + putUserRequest.setJsonEntity(Strings.format(""" + { + "password": "%s", + "roles" : ["%s"] + }""", PASS, roleName)); + assertOK(performRequestAgainstFulfillingCluster(putUserRequest)); + } + + static Response runESQLCommandAgainstQueryCluster(String user, String command) throws IOException { + if (command.toLowerCase(Locale.ROOT).contains("limit") == false) { + // add a (high) limit to avoid warnings on default limit + command += " | limit 10000000"; + } + XContentBuilder json = JsonXContent.contentBuilder(); + json.startObject(); + json.field("query", command); + addRandomPragmas(json); + json.endObject(); + Request request = new Request("POST", "_query"); + request.setJsonEntity(org.elasticsearch.common.Strings.toString(json)); + request.setOptions(RequestOptions.DEFAULT.toBuilder().addHeader("es-security-runas-user", user)); + request.addParameter("error_trace", "true"); + Response response = adminClient().performRequest(request); + assertOK(response); + return response; + } + + static void addRandomPragmas(XContentBuilder builder) throws IOException { + if (Build.current().isSnapshot()) { + Settings pragmas = randomPragmas(); + if (pragmas != Settings.EMPTY) { + builder.startObject("pragma"); + builder.value(pragmas); + builder.endObject(); + } + } + } + + static Settings randomPragmas() { + Settings.Builder settings = Settings.builder(); + if (randomBoolean()) { + settings.put("page_size", between(1, 5)); + } + if (randomBoolean()) { + settings.put("exchange_buffer_size", between(1, 2)); + } + if (randomBoolean()) { + settings.put("data_partitioning", randomFrom("shard", "segment", "doc")); + } + if (randomBoolean()) { + settings.put("enrich_max_workers", between(1, 5)); + } + if (randomBoolean()) { + settings.put("node_level_reduction", randomBoolean()); + } + return settings.build(); + } + + static void createDataStreamOnFulfillingCluster() throws Exception { + createDataStreamPolicy(AbstractRemoteClusterSecurityTestCase::performRequestAgainstFulfillingCluster); + createDataStreamComponentTemplate(AbstractRemoteClusterSecurityTestCase::performRequestAgainstFulfillingCluster); + createDataStreamIndexTemplate(AbstractRemoteClusterSecurityTestCase::performRequestAgainstFulfillingCluster); + createDataStreamDocuments(AbstractRemoteClusterSecurityTestCase::performRequestAgainstFulfillingCluster); + createDataStreamAlias(AbstractRemoteClusterSecurityTestCase::performRequestAgainstFulfillingCluster); + } + + private static void createDataStreamPolicy(CheckedFunction requestConsumer) throws Exception { + Request request = new Request("PUT", "_ilm/policy/my-lifecycle-policy"); + request.setJsonEntity(""" + { + "policy": { + "phases": { + "hot": { + "actions": { + "rollover": { + "max_primary_shard_size": "50gb" + } + } + }, + "delete": { + "min_age": "735d", + "actions": { + "delete": {} + } + } + } + } + }"""); + + requestConsumer.apply(request); + } + + private static void createDataStreamComponentTemplate(CheckedFunction requestConsumer) throws Exception { + Request request = new Request("PUT", "_component_template/my-template"); + request.setJsonEntity(""" + { + "template": { + "settings": { + "index.lifecycle.name": "my-lifecycle-policy" + }, + "mappings": { + "properties": { + "@timestamp": { + "type": "date", + "format": "date_optional_time||epoch_millis" + }, + "data_stream": { + "properties": { + "namespace": {"type": "keyword"}, + "environment": {"type": "keyword"} + } + } + } + } + } + }"""); + requestConsumer.apply(request); + } + + private static void createDataStreamIndexTemplate(CheckedFunction requestConsumer) throws Exception { + Request request = new Request("PUT", "_index_template/my-index-template"); + request.setJsonEntity(""" + { + "index_patterns": ["logs-*"], + "data_stream": {}, + "composed_of": ["my-template"], + "priority": 500 + }"""); + requestConsumer.apply(request); + } + + private static void createDataStreamDocuments(CheckedFunction requestConsumer) throws Exception { + Request request = new Request("POST", "logs-foo/_bulk"); + request.addParameter("refresh", ""); + request.setJsonEntity(""" + { "create" : {} } + { "@timestamp": "2099-05-06T16:21:15.000Z", "data_stream": {"namespace": "16", "environment": "dev"} } + { "create" : {} } + { "@timestamp": "2001-05-06T16:21:15.000Z", "data_stream": {"namespace": "17", "environment": "prod"} } + """); + assertMap(entityAsMap(requestConsumer.apply(request)), matchesMap().extraOk().entry("errors", false)); + } + + private static void createDataStreamAlias(CheckedFunction requestConsumer) throws Exception { + Request request = new Request("PUT", "_alias"); + request.setJsonEntity(""" + { + "actions": [ + { + "add": { + "index": "logs-foo", + "alias": "alias-foo" + } + } + ] + }"""); + assertMap(entityAsMap(requestConsumer.apply(request)), matchesMap().extraOk().entry("errors", false)); + } + + static void doTestDataStreamsWithFlsAndDls() throws IOException { + // DLS + MapMatcher twoResults = matchesMap().extraOk().entry("values", matchesList().item(matchesList().item(2))); + MapMatcher oneResult = matchesMap().extraOk().entry("values", matchesList().item(matchesList().item(1))); + assertMap( + entityAsMap(runESQLCommandAgainstQueryCluster("logs_foo_all", "FROM my_remote_cluster:logs-foo | STATS COUNT(*)")), + twoResults + ); + assertMap( + entityAsMap(runESQLCommandAgainstQueryCluster("logs_foo_16_only", "FROM my_remote_cluster:logs-foo | STATS COUNT(*)")), + oneResult + ); + assertMap( + entityAsMap(runESQLCommandAgainstQueryCluster("logs_foo_after_2021", "FROM my_remote_cluster:logs-foo | STATS COUNT(*)")), + oneResult + ); + assertMap( + entityAsMap( + runESQLCommandAgainstQueryCluster("logs_foo_after_2021_pattern", "FROM my_remote_cluster:logs-foo | STATS COUNT(*)") + ), + oneResult + ); + assertMap( + entityAsMap(runESQLCommandAgainstQueryCluster("logs_foo_all", "FROM my_remote_cluster:logs-* | STATS COUNT(*)")), + twoResults + ); + assertMap( + entityAsMap(runESQLCommandAgainstQueryCluster("logs_foo_16_only", "FROM my_remote_cluster:logs-* | STATS COUNT(*)")), + oneResult + ); + assertMap( + entityAsMap(runESQLCommandAgainstQueryCluster("logs_foo_after_2021", "FROM my_remote_cluster:logs-* | STATS COUNT(*)")), + oneResult + ); + assertMap( + entityAsMap(runESQLCommandAgainstQueryCluster("logs_foo_after_2021_pattern", "FROM my_remote_cluster:logs-* | STATS COUNT(*)")), + oneResult + ); + + assertMap( + entityAsMap( + runESQLCommandAgainstQueryCluster("logs_foo_after_2021_alias", "FROM my_remote_cluster:alias-foo | STATS COUNT(*)") + ), + oneResult + ); + assertMap( + entityAsMap(runESQLCommandAgainstQueryCluster("logs_foo_after_2021_alias", "FROM my_remote_cluster:alias-* | STATS COUNT(*)")), + oneResult + ); + + // FLS + // logs_foo_all does not have FLS restrictions so should be able to access all fields + assertMap( + entityAsMap( + runESQLCommandAgainstQueryCluster("logs_foo_all", "FROM my_remote_cluster:logs-foo | SORT data_stream.namespace | LIMIT 1") + ), + matchesMap().extraOk() + .entry( + "columns", + List.of( + matchesMap().entry("name", "@timestamp").entry("type", "date"), + matchesMap().entry("name", "data_stream.environment").entry("type", "keyword"), + matchesMap().entry("name", "data_stream.namespace").entry("type", "keyword") + ) + ) + ); + assertMap( + entityAsMap( + runESQLCommandAgainstQueryCluster("logs_foo_all", "FROM my_remote_cluster:logs-* | SORT data_stream.namespace | LIMIT 1") + ), + matchesMap().extraOk() + .entry( + "columns", + List.of( + matchesMap().entry("name", "@timestamp").entry("type", "date"), + matchesMap().entry("name", "data_stream.environment").entry("type", "keyword"), + matchesMap().entry("name", "data_stream.namespace").entry("type", "keyword") + ) + ) + ); + + assertMap( + entityAsMap( + runESQLCommandAgainstQueryCluster( + "fls_user_logs_pattern", + "FROM my_remote_cluster:logs-foo | SORT data_stream.namespace | LIMIT 1" + ) + ), + matchesMap().extraOk() + .entry( + "columns", + List.of( + matchesMap().entry("name", "@timestamp").entry("type", "date"), + matchesMap().entry("name", "data_stream.namespace").entry("type", "keyword") + ) + ) + ); + assertMap( + entityAsMap( + runESQLCommandAgainstQueryCluster( + "fls_user_logs_pattern", + "FROM my_remote_cluster:logs-* | SORT data_stream.namespace | LIMIT 1" + ) + ), + matchesMap().extraOk() + .entry( + "columns", + List.of( + matchesMap().entry("name", "@timestamp").entry("type", "date"), + matchesMap().entry("name", "data_stream.namespace").entry("type", "keyword") + ) + ) + ); + } +} diff --git a/x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityDataStreamEsqlRcs2IT.java b/x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityDataStreamEsqlRcs2IT.java new file mode 100644 index 0000000000000..c5cf704177020 --- /dev/null +++ b/x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityDataStreamEsqlRcs2IT.java @@ -0,0 +1,126 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.remotecluster; + +import org.elasticsearch.test.cluster.ElasticsearchCluster; +import org.elasticsearch.test.cluster.util.resource.Resource; +import org.elasticsearch.test.junit.RunnableTestRuleAdapter; +import org.junit.ClassRule; +import org.junit.rules.RuleChain; +import org.junit.rules.TestRule; + +import java.io.IOException; +import java.util.Map; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; + +import static org.elasticsearch.xpack.remotecluster.RemoteClusterSecurityDataStreamEsqlRcs1IT.createDataStreamOnFulfillingCluster; +import static org.elasticsearch.xpack.remotecluster.RemoteClusterSecurityDataStreamEsqlRcs1IT.createUserAndRoleOnQueryCluster; +import static org.elasticsearch.xpack.remotecluster.RemoteClusterSecurityDataStreamEsqlRcs1IT.doTestDataStreamsWithFlsAndDls; + +// TODO consolidate me with RemoteClusterSecurityDataStreamEsqlRcs1IT +public class RemoteClusterSecurityDataStreamEsqlRcs2IT extends AbstractRemoteClusterSecurityTestCase { + private static final AtomicReference> API_KEY_MAP_REF = new AtomicReference<>(); + private static final AtomicBoolean SSL_ENABLED_REF = new AtomicBoolean(); + private static final AtomicBoolean NODE1_RCS_SERVER_ENABLED = new AtomicBoolean(); + private static final AtomicBoolean NODE2_RCS_SERVER_ENABLED = new AtomicBoolean(); + + static { + fulfillingCluster = ElasticsearchCluster.local() + .name("fulfilling-cluster") + .nodes(3) + .module("x-pack-autoscaling") + .module("x-pack-esql") + .module("x-pack-enrich") + .module("x-pack-ml") + .module("x-pack-ilm") + .module("ingest-common") + .apply(commonClusterConfig) + .setting("remote_cluster.port", "0") + .setting("xpack.ml.enabled", "false") + .setting("xpack.security.remote_cluster_server.ssl.enabled", () -> String.valueOf(SSL_ENABLED_REF.get())) + .setting("xpack.security.remote_cluster_server.ssl.key", "remote-cluster.key") + .setting("xpack.security.remote_cluster_server.ssl.certificate", "remote-cluster.crt") + .setting("xpack.security.authc.token.enabled", "true") + .keystore("xpack.security.remote_cluster_server.ssl.secure_key_passphrase", "remote-cluster-password") + .node(0, spec -> spec.setting("remote_cluster_server.enabled", "true")) + .node(1, spec -> spec.setting("remote_cluster_server.enabled", () -> String.valueOf(NODE1_RCS_SERVER_ENABLED.get()))) + .node(2, spec -> spec.setting("remote_cluster_server.enabled", () -> String.valueOf(NODE2_RCS_SERVER_ENABLED.get()))) + .build(); + + queryCluster = ElasticsearchCluster.local() + .name("query-cluster") + .module("x-pack-autoscaling") + .module("x-pack-esql") + .module("x-pack-enrich") + .module("x-pack-ml") + .module("x-pack-ilm") + .module("ingest-common") + .apply(commonClusterConfig) + .setting("xpack.ml.enabled", "false") + .setting("xpack.security.remote_cluster_client.ssl.enabled", () -> String.valueOf(SSL_ENABLED_REF.get())) + .setting("xpack.security.remote_cluster_client.ssl.certificate_authorities", "remote-cluster-ca.crt") + .setting("xpack.security.authc.token.enabled", "true") + .keystore("cluster.remote.my_remote_cluster.credentials", () -> { + if (API_KEY_MAP_REF.get() == null) { + final Map apiKeyMap = createCrossClusterAccessApiKey(""" + { + "search": [ + { + "names": ["logs-*", "alias-*"] + } + ] + }"""); + API_KEY_MAP_REF.set(apiKeyMap); + } + return (String) API_KEY_MAP_REF.get().get("encoded"); + }) + .rolesFile(Resource.fromClasspath("roles.yml")) + .user("logs_foo_all", "x-pack-test-password", "logs_foo_all", false) + .user("logs_foo_16_only", "x-pack-test-password", "logs_foo_16_only", false) + .user("logs_foo_after_2021", "x-pack-test-password", "logs_foo_after_2021", false) + .user("logs_foo_after_2021_pattern", "x-pack-test-password", "logs_foo_after_2021_pattern", false) + .user("logs_foo_after_2021_alias", "x-pack-test-password", "logs_foo_after_2021_alias", false) + .build(); + } + + @ClassRule + // Use a RuleChain to ensure that fulfilling cluster is started before query cluster + // `SSL_ENABLED_REF` is used to control the SSL-enabled setting on the test clusters + // We set it here, since randomization methods are not available in the static initialize context above + public static TestRule clusterRule = RuleChain.outerRule(new RunnableTestRuleAdapter(() -> { + SSL_ENABLED_REF.set(usually()); + NODE1_RCS_SERVER_ENABLED.set(randomBoolean()); + NODE2_RCS_SERVER_ENABLED.set(randomBoolean()); + })).around(fulfillingCluster).around(queryCluster); + + public void testDataStreamsWithDlsAndFls() throws Exception { + configureRemoteCluster(); + createDataStreamOnFulfillingCluster(); + setupAdditionalUsersAndRoles(); + + doTestDataStreamsWithFlsAndDls(); + } + + private void setupAdditionalUsersAndRoles() throws IOException { + createUserAndRoleOnQueryCluster("fls_user_logs_pattern", "fls_user_logs_pattern", """ + { + "indices": [{"names": [""], "privileges": ["read"]}], + "remote_indices": [ + { + "names": ["logs-*"], + "privileges": ["read"], + "field_security": { + "grant": ["@timestamp", "data_stream.namespace"] + }, + "clusters": ["*"] + } + ] + }"""); + } +} diff --git a/x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/resources/roles.yml b/x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/resources/roles.yml index b61daa068ed1a..c09f9dc620a4c 100644 --- a/x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/resources/roles.yml +++ b/x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/resources/roles.yml @@ -41,3 +41,102 @@ ccr_user_role: manage_role: cluster: [ 'manage' ] + +logs_foo_all: + cluster: [] + indices: + - names: [ 'logs-foo' ] + privileges: [ 'read' ] + remote_indices: + - names: [ 'logs-foo' ] + clusters: [ '*' ] + privileges: [ 'read' ] + +logs_foo_16_only: + cluster: [] + indices: + - names: [ 'logs-foo' ] + privileges: [ 'read' ] + query: | + { + "term": { + "data_stream.namespace": "16" + } + } + remote_indices: + - names: [ 'logs-foo' ] + clusters: [ '*' ] + privileges: [ 'read' ] + query: | + { + "term": { + "data_stream.namespace": "16" + } + } + +logs_foo_after_2021: + cluster: [] + indices: + - names: [ 'logs-foo' ] + privileges: [ 'read' ] + query: | + { + "range": { + "@timestamp": {"gte": "2021-01-01T00:00:00"} + } + } + remote_indices: + - names: [ 'logs-foo' ] + clusters: [ '*' ] + privileges: [ 'read' ] + query: | + { + "range": { + "@timestamp": {"gte": "2021-01-01T00:00:00"} + } + } + +logs_foo_after_2021_pattern: + cluster: [] + indices: + - names: [ 'logs-*' ] + privileges: [ 'read' ] + query: | + { + "range": { + "@timestamp": {"gte": "2021-01-01T00:00:00"} + } + } + remote_indices: + - names: [ 'logs-*' ] + clusters: [ '*' ] + privileges: [ 'read' ] + query: | + { + "range": { + "@timestamp": {"gte": "2021-01-01T00:00:00"} + } + } + +logs_foo_after_2021_alias: + cluster: [] + indices: + - names: [ 'alias-foo' ] + privileges: [ 'read' ] + query: | + { + "range": { + "@timestamp": {"gte": "2021-01-01T00:00:00"} + } + } + remote_indices: + - names: [ 'alias-foo' ] + clusters: [ '*' ] + privileges: [ 'read' ] + query: | + { + "range": { + "@timestamp": {"gte": "2021-01-01T00:00:00"} + } + } +