diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index f0d9068820029..5b98444c044d2 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -70,7 +70,3 @@ server/src/main/java/org/elasticsearch/threadpool @elastic/es-core-infra # Security x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/privilege @elastic/es-security x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java @elastic/es-security - -# Analytical engine -x-pack/plugin/esql @elastic/es-analytical-engine -x-pack/plugin/esql-core @elastic/es-analytical-engine diff --git a/docs/changelog/112972.yaml b/docs/changelog/112972.yaml new file mode 100644 index 0000000000000..5332ac13fd13f --- /dev/null +++ b/docs/changelog/112972.yaml @@ -0,0 +1,6 @@ +pr: 112972 +summary: "ILM: Add `total_shards_per_node` setting to searchable snapshot" +area: ILM+SLM +type: enhancement +issues: + - 112261 diff --git a/docs/changelog/113183.yaml b/docs/changelog/113183.yaml new file mode 100644 index 0000000000000..f30ce9831adb3 --- /dev/null +++ b/docs/changelog/113183.yaml @@ -0,0 +1,6 @@ +pr: 113183 +summary: "ESQL: TOP support for strings" +area: ES|QL +type: feature +issues: + - 109849 diff --git a/docs/internal/DistributedArchitectureGuide.md b/docs/internal/DistributedArchitectureGuide.md index 5cbc1bf33ab7a..793d38e3d73b3 100644 --- a/docs/internal/DistributedArchitectureGuide.md +++ b/docs/internal/DistributedArchitectureGuide.md @@ -252,7 +252,7 @@ changes. The cloud service will add more resources to the cluster based on Elast Elasticsearch by itself cannot automatically scale. Autoscaling recommendations are tailored for the user [based on user defined policies][], composed of data -roles (hot, frozen, etc) and [deciders][]. There's a public [webinar on autoscaling][], as well as the +roles (hot, frozen, etc.) and [deciders][]. There's a public [webinar on autoscaling][], as well as the public [Autoscaling APIs] docs. Autoscaling's current implementation is based primary on storage requirements, as well as memory capacity @@ -332,7 +332,7 @@ problems in the cluster. It uses [an algorithm defined here][]. Some examples ar [an algorithm defined here]: https://github.com/elastic/elasticsearch/blob/v8.13.2/x-pack/plugin/autoscaling/src/main/java/org/elasticsearch/xpack/autoscaling/storage/ReactiveStorageDeciderService.java#L158-L176 The `ProactiveStorageDeciderService` maintains a forecast window that [defaults to 30 minutes][]. It only -runs on data streams (ILM, rollover, etc), not regular indexes. It looks at past [index changes][] that +runs on data streams (ILM, rollover, etc.), not regular indexes. It looks at past [index changes][] that took place within the forecast window to [predict][] resources that will be needed shortly. [defaults to 30 minutes]: https://github.com/elastic/elasticsearch/blob/v8.13.2/x-pack/plugin/autoscaling/src/main/java/org/elasticsearch/xpack/autoscaling/storage/ProactiveStorageDeciderService.java#L32 diff --git a/docs/plugins/analysis-icu.asciidoc b/docs/plugins/analysis-icu.asciidoc index f6ca6ceae7ea4..da7efd2843f50 100644 --- a/docs/plugins/analysis-icu.asciidoc +++ b/docs/plugins/analysis-icu.asciidoc @@ -380,7 +380,7 @@ GET /my-index-000001/_search <3> -------------------------- -<1> The `name` field uses the `standard` analyzer, and so support full text queries. +<1> The `name` field uses the `standard` analyzer, and so supports full text queries. <2> The `name.sort` field is an `icu_collation_keyword` field that will preserve the name as a single token doc values, and applies the German ``phonebook'' order. <3> An example query which searches the `name` field and sorts on the `name.sort` field. @@ -467,7 +467,7 @@ differences. `case_first`:: Possible values: `lower` or `upper`. Useful to control which case is sorted -first when case is not ignored for strength `tertiary`. The default depends on +first when the case is not ignored for strength `tertiary`. The default depends on the collation. `numeric`:: diff --git a/docs/plugins/analysis-kuromoji.asciidoc b/docs/plugins/analysis-kuromoji.asciidoc index e8380ce0aca17..0a167bf3f0240 100644 --- a/docs/plugins/analysis-kuromoji.asciidoc +++ b/docs/plugins/analysis-kuromoji.asciidoc @@ -86,7 +86,7 @@ The `kuromoji_iteration_mark` normalizes Japanese horizontal iteration marks `normalize_kanji`:: - Indicates whether kanji iteration marks should be normalize. Defaults to `true`. + Indicates whether kanji iteration marks should be normalized. Defaults to `true`. `normalize_kana`:: @@ -194,7 +194,7 @@ PUT kuromoji_sample + -- Additional expert user parameters `nbest_cost` and `nbest_examples` can be used -to include additional tokens that most likely according to the statistical model. +to include additional tokens that are most likely according to the statistical model. If both parameters are used, the largest number of both is applied. `nbest_cost`:: diff --git a/docs/plugins/analysis-nori.asciidoc b/docs/plugins/analysis-nori.asciidoc index e7855f94758e1..02980a4ed8a8c 100644 --- a/docs/plugins/analysis-nori.asciidoc +++ b/docs/plugins/analysis-nori.asciidoc @@ -452,7 +452,7 @@ Which responds with: The `nori_number` token filter normalizes Korean numbers to regular Arabic decimal numbers in half-width characters. -Korean numbers are often written using a combination of Hangul and Arabic numbers with various kinds punctuation. +Korean numbers are often written using a combination of Hangul and Arabic numbers with various kinds of punctuation. For example, 3.2천 means 3200. This filter does this kind of normalization and allows a search for 3200 to match 3.2천 in text, but can also be used to make range facets based on the normalized numbers and so on. diff --git a/docs/plugins/development/creating-classic-plugins.asciidoc b/docs/plugins/development/creating-classic-plugins.asciidoc index cc03ad51275fa..58dc00e496c2d 100644 --- a/docs/plugins/development/creating-classic-plugins.asciidoc +++ b/docs/plugins/development/creating-classic-plugins.asciidoc @@ -18,7 +18,7 @@ will refuse to start in the presence of plugins with the incorrect [discrete] ==== Classic plugin file structure -Classis plugins are ZIP files composed of JAR files and +Classic plugins are ZIP files composed of JAR files and <>, a Java properties file that describes the plugin. diff --git a/docs/plugins/development/creating-stable-plugins.asciidoc b/docs/plugins/development/creating-stable-plugins.asciidoc index c9a8a1f6c7e2a..9f98774b5a761 100644 --- a/docs/plugins/development/creating-stable-plugins.asciidoc +++ b/docs/plugins/development/creating-stable-plugins.asciidoc @@ -1,8 +1,8 @@ [[creating-stable-plugins]] === Creating text analysis plugins with the stable plugin API -Text analysis plugins provide {es} with custom {ref}/analysis.html[Lucene -analyzers, token filters, character filters, and tokenizers]. +Text analysis plugins provide {es} with custom {ref}/analysis.html[Lucene +analyzers, token filters, character filters, and tokenizers]. [discrete] ==== The stable plugin API @@ -10,7 +10,7 @@ analyzers, token filters, character filters, and tokenizers]. Text analysis plugins can be developed against the stable plugin API. This API consists of the following dependencies: -* `plugin-api` - an API used by plugin developers to implement custom {es} +* `plugin-api` - an API used by plugin developers to implement custom {es} plugins. * `plugin-analysis-api` - an API used by plugin developers to implement analysis plugins and integrate them into {es}. @@ -18,7 +18,7 @@ plugins and integrate them into {es}. core Lucene analysis interfaces like `Tokenizer`, `Analyzer`, and `TokenStream`. For new versions of {es} within the same major version, plugins built against -this API do not need to be recompiled. Future versions of the API will be +this API does not need to be recompiled. Future versions of the API will be backwards compatible and plugins are binary compatible with future versions of {es}. In other words, once you have a working artifact, you can re-use it when you upgrade {es} to a new bugfix or minor version. @@ -48,9 +48,9 @@ require code changes. Stable plugins are ZIP files composed of JAR files and two metadata files: -* `stable-plugin-descriptor.properties` - a Java properties file that describes +* `stable-plugin-descriptor.properties` - a Java properties file that describes the plugin. Refer to <>. -* `named_components.json` - a JSON file mapping interfaces to key-value pairs +* `named_components.json` - a JSON file mapping interfaces to key-value pairs of component names and implementation classes. Note that only JAR files at the root of the plugin are added to the classpath @@ -65,7 +65,7 @@ you use this plugin. However, you don't need Gradle to create plugins. The {es} Github repository contains {es-repo}tree/main/plugins/examples/stable-analysis[an example analysis plugin]. -The example `build.gradle` build script provides a good starting point for +The example `build.gradle` build script provides a good starting point for developing your own plugin. [discrete] @@ -77,29 +77,29 @@ Plugins are written in Java, so you need to install a Java Development Kit [discrete] ===== Step by step -. Create a directory for your project. +. Create a directory for your project. . Copy the example `build.gradle` build script to your project directory. Note that this build script uses the `elasticsearch.stable-esplugin` gradle plugin to build your plugin. . Edit the `build.gradle` build script: -** Add a definition for the `pluginApiVersion` and matching `luceneVersion` -variables to the top of the file. You can find these versions in the -`build-tools-internal/version.properties` file in the {es-repo}[Elasticsearch +** Add a definition for the `pluginApiVersion` and matching `luceneVersion` +variables to the top of the file. You can find these versions in the +`build-tools-internal/version.properties` file in the {es-repo}[Elasticsearch Github repository]. -** Edit the `name` and `description` in the `esplugin` section of the build -script. This will create the plugin descriptor file. If you're not using the -`elasticsearch.stable-esplugin` gradle plugin, refer to +** Edit the `name` and `description` in the `esplugin` section of the build +script. This will create the plugin descriptor file. If you're not using the +`elasticsearch.stable-esplugin` gradle plugin, refer to <> to create the file manually. ** Add module information. -** Ensure you have declared the following compile-time dependencies. These -dependencies are compile-time only because {es} will provide these libraries at +** Ensure you have declared the following compile-time dependencies. These +dependencies are compile-time only because {es} will provide these libraries at runtime. *** `org.elasticsearch.plugin:elasticsearch-plugin-api` *** `org.elasticsearch.plugin:elasticsearch-plugin-analysis-api` *** `org.apache.lucene:lucene-analysis-common` -** For unit testing, ensure these dependencies have also been added to the +** For unit testing, ensure these dependencies have also been added to the `build.gradle` script as `testImplementation` dependencies. -. Implement an interface from the analysis plugin API, annotating it with +. Implement an interface from the analysis plugin API, annotating it with `NamedComponent`. Refer to <> for an example. . You should now be able to assemble a plugin ZIP file by running: + @@ -107,22 +107,22 @@ runtime. ---- gradle bundlePlugin ---- -The resulting plugin ZIP file is written to the `build/distributions` +The resulting plugin ZIP file is written to the `build/distributions` directory. [discrete] ===== YAML REST tests -The Gradle `elasticsearch.yaml-rest-test` plugin enables testing of your -plugin using the {es-repo}blob/main/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/README.asciidoc[{es} yamlRestTest framework]. +The Gradle `elasticsearch.yaml-rest-test` plugin enables testing of your +plugin using the {es-repo}blob/main/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/README.asciidoc[{es} yamlRestTest framework]. These tests use a YAML-formatted domain language to issue REST requests against -an internal {es} cluster that has your plugin installed, and to check the -results of those requests. The structure of a YAML REST test directory is as +an internal {es} cluster that has your plugin installed, and to check the +results of those requests. The structure of a YAML REST test directory is as follows: -* A test suite class, defined under `src/yamlRestTest/java`. This class should +* A test suite class, defined under `src/yamlRestTest/java`. This class should extend `ESClientYamlSuiteTestCase`. -* The YAML tests themselves should be defined under +* The YAML tests themselves should be defined under `src/yamlRestTest/resources/test/`. [[plugin-descriptor-file-stable]] diff --git a/docs/plugins/discovery-azure-classic.asciidoc b/docs/plugins/discovery-azure-classic.asciidoc index aa710a2fe7ef9..b8d37f024172c 100644 --- a/docs/plugins/discovery-azure-classic.asciidoc +++ b/docs/plugins/discovery-azure-classic.asciidoc @@ -148,7 +148,7 @@ Before starting, you need to have: -- You should follow http://azure.microsoft.com/en-us/documentation/articles/linux-use-ssh-key/[this guide] to learn -how to create or use existing SSH keys. If you have already did it, you can skip the following. +how to create or use existing SSH keys. If you have already done it, you can skip the following. Here is a description on how to generate SSH keys using `openssl`: diff --git a/docs/plugins/discovery-gce.asciidoc b/docs/plugins/discovery-gce.asciidoc index 2e8cff21208e0..0a2629b7f094b 100644 --- a/docs/plugins/discovery-gce.asciidoc +++ b/docs/plugins/discovery-gce.asciidoc @@ -478,7 +478,7 @@ discovery: seed_providers: gce -------------------------------------------------- -Replaces `project_id` and `zone` with your settings. +Replace `project_id` and `zone` with your settings. To run test: diff --git a/docs/plugins/integrations.asciidoc b/docs/plugins/integrations.asciidoc index 71f237692ad35..aff4aed0becd2 100644 --- a/docs/plugins/integrations.asciidoc +++ b/docs/plugins/integrations.asciidoc @@ -91,7 +91,7 @@ Integrations are not plugins, but are external tools or modules that make it eas Elasticsearch Grails plugin. * https://hibernate.org/search/[Hibernate Search] - Integration with Hibernate ORM, from the Hibernate team. Automatic synchronization of write operations, yet exposes full Elasticsearch capabilities for queries. Can return either Elasticsearch native or re-map queries back into managed entities loaded within transaction from the reference database. + Integration with Hibernate ORM, from the Hibernate team. Automatic synchronization of write operations, yet exposes full Elasticsearch capabilities for queries. Can return either Elasticsearch native or re-map queries back into managed entities loaded within transactions from the reference database. * https://github.com/spring-projects/spring-data-elasticsearch[Spring Data Elasticsearch]: Spring Data implementation for Elasticsearch @@ -104,7 +104,7 @@ Integrations are not plugins, but are external tools or modules that make it eas * https://pulsar.apache.org/docs/en/io-elasticsearch[Apache Pulsar]: The Elasticsearch Sink Connector is used to pull messages from Pulsar topics - and persist the messages to a index. + and persist the messages to an index. * https://micronaut-projects.github.io/micronaut-elasticsearch/latest/guide/index.html[Micronaut Elasticsearch Integration]: Integration of Micronaut with Elasticsearch diff --git a/docs/plugins/mapper-annotated-text.asciidoc b/docs/plugins/mapper-annotated-text.asciidoc index afe8ba41da9b8..e4141e98a2285 100644 --- a/docs/plugins/mapper-annotated-text.asciidoc +++ b/docs/plugins/mapper-annotated-text.asciidoc @@ -143,7 +143,7 @@ broader positional queries e.g. finding mentions of a `Guitarist` near to `strat WARNING: Any use of `=` signs in annotation values eg `[Prince](person=Prince)` will cause the document to be rejected with a parse failure. In future we hope to have a use for -the equals signs so wil actively reject documents that contain this today. +the equals signs so will actively reject documents that contain this today. [[annotated-text-synthetic-source]] ===== Synthetic `_source` diff --git a/docs/plugins/store-smb.asciidoc b/docs/plugins/store-smb.asciidoc index 8557ef868010f..da803b4f42022 100644 --- a/docs/plugins/store-smb.asciidoc +++ b/docs/plugins/store-smb.asciidoc @@ -10,7 +10,7 @@ include::install_remove.asciidoc[] ==== Working around a bug in Windows SMB and Java on windows When using a shared file system based on the SMB protocol (like Azure File Service) to store indices, the way Lucene -open index segment files is with a write only flag. This is the _correct_ way to open the files, as they will only be +opens index segment files is with a write only flag. This is the _correct_ way to open the files, as they will only be used for writes and allows different FS implementations to optimize for it. Sadly, in windows with SMB, this disables the cache manager, causing writes to be slow. This has been described in https://issues.apache.org/jira/browse/LUCENE-6176[LUCENE-6176], but it affects each and every Java program out there!. @@ -44,7 +44,7 @@ This can be configured for all indices by adding this to the `elasticsearch.yml` index.store.type: smb_nio_fs ---- -Note that setting will be applied for newly created indices. +Note that settings will be applied for newly created indices. It can also be set on a per-index basis at index creation time: diff --git a/docs/reference/commands/cli-jvm-options.asciidoc b/docs/reference/commands/cli-jvm-options.asciidoc index 546884f428c12..0428ead60b626 100644 --- a/docs/reference/commands/cli-jvm-options.asciidoc +++ b/docs/reference/commands/cli-jvm-options.asciidoc @@ -3,7 +3,7 @@ ==== JVM options CLI tools run with 64MB of heap. For most tools, this value is fine. However, if -needed this can be overriden by setting the `CLI_JAVA_OPTS` environment variable. +needed this can be overridden by setting the `CLI_JAVA_OPTS` environment variable. For example, the following increases the heap size used by the `pass:a[elasticsearch-{tool-name}]` tool to 1GB. diff --git a/docs/reference/connector/apis/connector-apis.asciidoc b/docs/reference/connector/apis/connector-apis.asciidoc index 987f82f6b4ce4..3de4483adcfd1 100644 --- a/docs/reference/connector/apis/connector-apis.asciidoc +++ b/docs/reference/connector/apis/connector-apis.asciidoc @@ -82,7 +82,7 @@ beta:[] preview::[] -*Connector Service APIs* are a subset of Connector API endpoints, that represent framework-level operations defined in the https://github.com/elastic/connectors/blob/main/docs/CONNECTOR_PROTOCOL.md[Connector Protocol]. These APIs are not intended for direct connector management by users but are there to support the implementation of services that utilize the Conector Protocol to communicate with {es}. +*Connector Service APIs* are a subset of Connector API endpoints, that represent framework-level operations defined in the https://github.com/elastic/connectors/blob/main/docs/CONNECTOR_PROTOCOL.md[Connector Protocol]. These APIs are not intended for direct connector management by users but are there to support the implementation of services that utilize the Connector Protocol to communicate with {es}. [TIP] ==== diff --git a/docs/reference/esql/functions/kibana/definition/top.json b/docs/reference/esql/functions/kibana/definition/top.json index 2e8e51e726588..62184326994fe 100644 --- a/docs/reference/esql/functions/kibana/definition/top.json +++ b/docs/reference/esql/functions/kibana/definition/top.json @@ -124,6 +124,30 @@ "variadic" : false, "returnType" : "ip" }, + { + "params" : [ + { + "name" : "field", + "type" : "keyword", + "optional" : false, + "description" : "The field to collect the top values for." + }, + { + "name" : "limit", + "type" : "integer", + "optional" : false, + "description" : "The maximum number of values to collect." + }, + { + "name" : "order", + "type" : "keyword", + "optional" : false, + "description" : "The order to calculate the top values. Either `asc` or `desc`." + } + ], + "variadic" : false, + "returnType" : "keyword" + }, { "params" : [ { @@ -147,6 +171,30 @@ ], "variadic" : false, "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "text", + "optional" : false, + "description" : "The field to collect the top values for." + }, + { + "name" : "limit", + "type" : "integer", + "optional" : false, + "description" : "The maximum number of values to collect." + }, + { + "name" : "order", + "type" : "keyword", + "optional" : false, + "description" : "The order to calculate the top values. Either `asc` or `desc`." + } + ], + "variadic" : false, + "returnType" : "text" } ], "examples" : [ diff --git a/docs/reference/esql/functions/types/top.asciidoc b/docs/reference/esql/functions/types/top.asciidoc index 0eb329c10b9ed..25d7962a27252 100644 --- a/docs/reference/esql/functions/types/top.asciidoc +++ b/docs/reference/esql/functions/types/top.asciidoc @@ -10,5 +10,7 @@ date | integer | keyword | date double | integer | keyword | double integer | integer | keyword | integer ip | integer | keyword | ip +keyword | integer | keyword | keyword long | integer | keyword | long +text | integer | keyword | text |=== diff --git a/docs/reference/ilm/actions/ilm-searchable-snapshot.asciidoc b/docs/reference/ilm/actions/ilm-searchable-snapshot.asciidoc index 4ba4782174bef..73a77bef09bde 100644 --- a/docs/reference/ilm/actions/ilm-searchable-snapshot.asciidoc +++ b/docs/reference/ilm/actions/ilm-searchable-snapshot.asciidoc @@ -19,7 +19,7 @@ index>> prefixed with `partial-` to the frozen tier. In other phases, the action In the frozen tier, the action will ignore the setting <>, if it was present in the original index, -to account for the difference in the number of nodes between the frozen and the other tiers. +to account for the difference in the number of nodes between the frozen and the other tiers. To set <> for searchable snapshots, set the `total_shards_per_node` option in the frozen phase's `searchable_snapshot` action within the ILM policy. WARNING: Don't include the `searchable_snapshot` action in both the hot and cold @@ -74,6 +74,9 @@ will be performed on the hot nodes. If using a `searchable_snapshot` action in t force merge will be performed on whatever tier the index is *prior* to the `cold` phase (either `hot` or `warm`). +`total_shards_per_node`:: +The maximum number of shards (replicas and primaries) that will be allocated to a single node for the searchable snapshot index. Defaults to unbounded. + [[ilm-searchable-snapshot-ex]] ==== Examples //// diff --git a/docs/reference/mapping/params/ignore-above.asciidoc b/docs/reference/mapping/params/ignore-above.asciidoc index 7d04bc82dcbb3..526f2d6205961 100644 --- a/docs/reference/mapping/params/ignore-above.asciidoc +++ b/docs/reference/mapping/params/ignore-above.asciidoc @@ -57,3 +57,33 @@ NOTE: The value for `ignore_above` is the _character count_, but Lucene counts bytes. If you use UTF-8 text with many non-ASCII characters, you may want to set the limit to `32766 / 4 = 8191` since UTF-8 characters may occupy at most 4 bytes. + +[[index-mapping-ignore-above]] +=== `index.mapping.ignore_above` + +The `ignore_above` setting, typically used at the field level, can also be applied at the index level using +`index.mapping.ignore_above`. This setting lets you define a maximum string length for all applicable fields across +the index, including `keyword`, `wildcard`, and keyword values in `flattened` fields. Any values that exceed this +limit will be ignored during indexing and won’t be stored. + +This index-wide setting ensures a consistent approach to managing excessively long values. It works the same as the +field-level setting—if a string’s length goes over the specified limit, that string won’t be indexed or stored. +When dealing with arrays, each element is evaluated separately, and only the elements that exceed the limit are ignored. + +[source,console] +-------------------------------------------------- +PUT my-index-000001 +{ + "settings": { + "index.mapping.ignore_above": 256 + } +} +-------------------------------------------------- + +In this example, all applicable fields in `my-index-000001` will ignore any strings longer than 256 characters. + +TIP: You can override this index-wide setting for specific fields by specifying a custom `ignore_above` value in the +field mapping. + +NOTE: Just like the field-level `ignore_above`, this setting only affects indexing and storage. The original values +are still available in the `_source` field if `_source` is enabled, which is the default behavior in Elasticsearch. diff --git a/docs/reference/modules/threadpool.asciidoc b/docs/reference/modules/threadpool.asciidoc index ed4becbfbb6d0..2d4110bdcb431 100644 --- a/docs/reference/modules/threadpool.asciidoc +++ b/docs/reference/modules/threadpool.asciidoc @@ -121,6 +121,11 @@ There are several thread pools, but the important ones include: `min(5 * (`<>`), 50)` and queue_size of `1000`. +[[modules-threadpool-esql]]`esql_worker`:: + Executes <> operations. Thread pool type is `fixed` with a + size of `int((`<> + `pass:[ * ]3) / 2) + 1`, and queue_size of `1000`. + Thread pool settings are <> and can be changed by editing `elasticsearch.yml`. Changing a specific thread pool can be done by setting its type-specific parameters; for example, changing the number of diff --git a/docs/reference/rest-api/usage.asciidoc b/docs/reference/rest-api/usage.asciidoc index a54dbe21b46c6..4a8895807f2fa 100644 --- a/docs/reference/rest-api/usage.asciidoc +++ b/docs/reference/rest-api/usage.asciidoc @@ -195,7 +195,13 @@ GET /_xpack/usage } } }, - "node_count" : 1 + "node_count" : 1, + "memory": { + anomaly_detectors_memory_bytes: 0, + data_frame_analytics_memory_bytes: 0, + pytorch_inference_memory_bytes: 0, + total_used_memory_bytes: 0 + } }, "inference": { "available" : true, diff --git a/docs/reference/search/retriever.asciidoc b/docs/reference/search/retriever.asciidoc index 6d3a1a36ad407..54836ac33762d 100644 --- a/docs/reference/search/retriever.asciidoc +++ b/docs/reference/search/retriever.asciidoc @@ -81,7 +81,43 @@ retrievers) *only* the query element is allowed. [[standard-retriever-example]] ==== Example -[source,js] +//// +[source,console] +---- +PUT /restaurants +{ + "mappings": { + "properties": { + "region": { "type": "keyword" }, + "year": { "type": "keyword" }, + "vector": { + "type": "dense_vector", + "dims": 3 + } + } + } +} + +POST /restaurants/_bulk?refresh +{"index":{}} +{"region": "Austria", "year": "2019", "vector": [10, 22, 77]} +{"index":{}} +{"region": "France", "year": "2019", "vector": [10, 22, 78]} +{"index":{}} +{"region": "Austria", "year": "2020", "vector": [10, 22, 79]} +{"index":{}} +{"region": "France", "year": "2020", "vector": [10, 22, 80]} +---- +// TESTSETUP + +[source,console] +-------------------------------------------------- +DELETE /restaurants +-------------------------------------------------- +// TEARDOWN +//// + +[source,console] ---- GET /restaurants/_search { @@ -109,9 +145,8 @@ GET /restaurants/_search } } ---- -// NOTCONSOLE <1> Opens the `retriever` object. -<2> The `standard` retriever is used for definining traditional {es} queries. +<2> The `standard` retriever is used for defining traditional {es} queries. <3> The entry point for defining the search query. <4> The `bool` object allows for combining multiple query clauses logically. <5> The `should` array indicates conditions under which a document will match. Documents matching these conditions will increase their relevancy score. @@ -171,9 +206,9 @@ The parameters `query_vector` and `query_vector_builder` cannot be used together [[knn-retriever-example]] ==== Example -[source,js] +[source,console] ---- -GET my-embeddings/_search +GET /restaurants/_search { "retriever": { "knn": { <1> @@ -185,8 +220,7 @@ GET my-embeddings/_search } } ---- -// NOTCONSOLE - +// TEST[continued] <1> Configuration for k-nearest neighbor (knn) search, which is based on vector similarity. <2> Specifies the field name that contains the vectors. <3> The query vector against which document vectors are compared in the `knn` search. @@ -223,7 +257,7 @@ the retriever tree. A simple hybrid search example (lexical search + dense vector search) combining a `standard` retriever with a `knn` retriever using RRF: -[source,js] +[source,console] ---- GET /restaurants/_search { @@ -234,7 +268,7 @@ GET /restaurants/_search "standard": { <3> "query": { "multi_match": { - "query": "San Francisco", + "query": "Austria", "fields": [ "city", "region" @@ -258,7 +292,7 @@ GET /restaurants/_search } } ---- -// NOTCONSOLE +// TEST[continued] <1> Defines a retriever tree with an RRF retriever. <2> The sub-retriever array. <3> The first sub-retriever is a `standard` retriever. @@ -272,7 +306,7 @@ GET /restaurants/_search A more complex hybrid search example (lexical search + ELSER sparse vector search + dense vector search) using RRF: -[source,js] +[source,console] ---- GET movies/_search { @@ -316,7 +350,7 @@ GET movies/_search } } ---- -// NOTCONSOLE +// TEST[skip:uses ELSER] [[text-similarity-reranker-retriever]] ==== Text Similarity Re-ranker Retriever @@ -390,7 +424,7 @@ A text similarity re-ranker retriever is a compound retriever. Child retrievers This example enables out-of-the-box semantic search by re-ranking top documents using the Cohere Rerank API. This approach eliminate the need to generate and store embeddings for all indexed documents. This requires a <> using the `rerank` task type. -[source,js] +[source,console] ---- GET /index/_search { @@ -414,7 +448,7 @@ GET /index/_search } } ---- -// NOTCONSOLE +// TEST[skip:uses ML] [discrete] [[text-similarity-reranker-retriever-example-eland]] @@ -452,7 +486,7 @@ eland_import_hub_model \ + . Create an inference endpoint for the `rerank` task + -[source,js] +[source,console] ---- PUT _inference/rerank/my-msmarco-minilm-model { @@ -464,11 +498,11 @@ PUT _inference/rerank/my-msmarco-minilm-model } } ---- -// NOTCONSOLE +// TEST[skip:uses ML] + . Define a `text_similarity_rerank` retriever. + -[source,js] +[source,console] ---- POST movies/_search { @@ -490,7 +524,7 @@ POST movies/_search } } ---- -// NOTCONSOLE +// TEST[skip:uses ML] + This retriever uses a standard `match` query to search the `movie` index for films tagged with the genre "drama". It then re-ranks the results based on semantic similarity to the text in the `inference_text` parameter, using the model we uploaded to {es}. diff --git a/docs/reference/settings/security-settings.asciidoc b/docs/reference/settings/security-settings.asciidoc index 7dd9d0574638c..0fc4d59e72350 100644 --- a/docs/reference/settings/security-settings.asciidoc +++ b/docs/reference/settings/security-settings.asciidoc @@ -1990,7 +1990,7 @@ idle for more than the specified timeout. The server can also set the `Keep-Alive` HTTP response header. The effective time-to-live value is the smaller value between this setting and the `Keep-Alive` -reponse header. Configure this setting to `-1` to let the server dictate the value. +response header. Configure this setting to `-1` to let the server dictate the value. If the header is not set by the server and the setting has value of `-1`, the time-to-live is infinite and connections never expire. // end::oidc-http-connection-pool-ttl-tag[] diff --git a/docs/reference/tab-widgets/troubleshooting/snapshot/corrupt-repository.asciidoc b/docs/reference/tab-widgets/troubleshooting/snapshot/corrupt-repository.asciidoc index b2e864aab6db9..942b0f6ba21a6 100644 --- a/docs/reference/tab-widgets/troubleshooting/snapshot/corrupt-repository.asciidoc +++ b/docs/reference/tab-widgets/troubleshooting/snapshot/corrupt-repository.asciidoc @@ -71,7 +71,7 @@ GET _snapshot/my-repo ---- // TEST[skip:we're not setting up repos in these tests] + -The reponse will look like this: +The response will look like this: + [source,console-result] ---- diff --git a/modules/transport-netty4/src/internalClusterTest/java/org/elasticsearch/http/netty4/Netty4IncrementalRequestHandlingIT.java b/modules/transport-netty4/src/internalClusterTest/java/org/elasticsearch/http/netty4/Netty4IncrementalRequestHandlingIT.java index 2b9c77b17bced..26d31b941f356 100644 --- a/modules/transport-netty4/src/internalClusterTest/java/org/elasticsearch/http/netty4/Netty4IncrementalRequestHandlingIT.java +++ b/modules/transport-netty4/src/internalClusterTest/java/org/elasticsearch/http/netty4/Netty4IncrementalRequestHandlingIT.java @@ -451,8 +451,7 @@ private void assertHttpBodyLogging(Function test) throws Exceptio "request end", HttpBodyTracer.class.getCanonicalName(), Level.TRACE, - "* request body (gzip compressed, base64-encoded, and split into * parts on preceding log lines; for details see " - + "https://www.elastic.co/guide/en/elasticsearch/reference/master/modules-network.html#http-rest-request-tracer)" + "* request body (gzip compressed, base64-encoded, and split into * parts on preceding log lines;*)" ) ); } diff --git a/muted-tests.yml b/muted-tests.yml index 251de0b060b2f..2a2f8071482d8 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -244,21 +244,12 @@ tests: - class: org.elasticsearch.smoketest.MlWithSecurityIT method: test {yaml=ml/sparse_vector_search/Test sparse_vector search with query vector and pruning config} issue: https://github.com/elastic/elasticsearch/issues/108997 -- class: org.elasticsearch.index.mapper.DoubleRangeFieldMapperTests - method: testSyntheticSourceKeepArrays - issue: https://github.com/elastic/elasticsearch/issues/113217 - class: org.elasticsearch.packaging.test.WindowsServiceTests method: test80JavaOptsInEnvVar issue: https://github.com/elastic/elasticsearch/issues/113219 -- class: org.elasticsearch.index.mapper.FloatRangeFieldMapperTests - method: testSyntheticSourceKeepArrays - issue: https://github.com/elastic/elasticsearch/issues/113220 - class: org.elasticsearch.xpack.esql.expression.function.aggregate.AvgTests method: "testFold {TestCase= #2}" issue: https://github.com/elastic/elasticsearch/issues/113225 -- class: org.elasticsearch.index.mapper.DoubleRangeFieldMapperTests - method: testSyntheticSourceKeepAll - issue: https://github.com/elastic/elasticsearch/issues/113234 - class: org.elasticsearch.integration.KibanaUserRoleIntegTests method: testGetMappings issue: https://github.com/elastic/elasticsearch/issues/113260 @@ -277,33 +268,21 @@ tests: - class: org.elasticsearch.xpack.test.rest.XPackRestIT method: test {p0=wildcard/10_wildcard_basic/Query_string wildcard query} issue: https://github.com/elastic/elasticsearch/issues/113316 -- class: org.elasticsearch.index.mapper.LongRangeFieldMapperTests - method: testSyntheticSourceKeepAll - issue: https://github.com/elastic/elasticsearch/issues/113324 - class: org.elasticsearch.backwards.MixedClusterClientYamlTestSuiteIT method: test {p0=mtermvectors/10_basic/Tests catching other exceptions per item} issue: https://github.com/elastic/elasticsearch/issues/113325 -- class: org.elasticsearch.index.mapper.IntegerRangeFieldMapperTests - method: testSyntheticSourceKeepArrays - issue: https://github.com/elastic/elasticsearch/issues/113326 - class: org.elasticsearch.xpack.test.rest.XPackRestIT method: test {p0=transform/transforms_force_delete/Test force deleting a running transform} issue: https://github.com/elastic/elasticsearch/issues/113327 - class: org.elasticsearch.integration.KibanaUserRoleIntegTests method: testValidateQuery issue: https://github.com/elastic/elasticsearch/issues/113328 -- class: org.elasticsearch.index.mapper.LongRangeFieldMapperTests - method: testSyntheticSourceKeepArrays - issue: https://github.com/elastic/elasticsearch/issues/113335 - class: org.elasticsearch.xpack.security.support.SecurityIndexManagerIntegTests method: testOnIndexAvailableForSearchIndexAlreadyAvailable issue: https://github.com/elastic/elasticsearch/issues/113336 - class: org.elasticsearch.xpack.security.authz.SecurityScrollTests method: testScrollIsPerUser issue: https://github.com/elastic/elasticsearch/issues/113338 -- class: org.elasticsearch.index.mapper.IntegerRangeFieldMapperTests - method: testSyntheticSourceKeepAll - issue: https://github.com/elastic/elasticsearch/issues/113339 - class: org.elasticsearch.xpack.test.rest.XPackRestIT method: test {p0=analytics/top_metrics/sort by scaled float field} issue: https://github.com/elastic/elasticsearch/issues/113340 @@ -322,6 +301,9 @@ tests: - class: org.elasticsearch.xpack.ml.integration.MlJobIT method: testDeleteJob_TimingStatsDocumentIsDeleted issue: https://github.com/elastic/elasticsearch/issues/113370 +- class: org.elasticsearch.xpack.esql.EsqlAsyncSecurityIT + method: testLimitedPrivilege + issue: https://github.com/elastic/elasticsearch/issues/113419 # Examples: # diff --git a/qa/smoke-test-http/src/javaRestTest/java/org/elasticsearch/http/IncrementalBulkRestIT.java b/qa/smoke-test-http/src/javaRestTest/java/org/elasticsearch/http/IncrementalBulkRestIT.java index 2b24e53874e51..da05011696274 100644 --- a/qa/smoke-test-http/src/javaRestTest/java/org/elasticsearch/http/IncrementalBulkRestIT.java +++ b/qa/smoke-test-http/src/javaRestTest/java/org/elasticsearch/http/IncrementalBulkRestIT.java @@ -29,6 +29,14 @@ @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.SUITE, supportsDedicatedMasters = false, numDataNodes = 2, numClientNodes = 0) public class IncrementalBulkRestIT extends HttpSmokeTestCase { + @Override + protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) { + return Settings.builder() + .put(super.nodeSettings(nodeOrdinal, otherSettings)) + .put(IncrementalBulkService.INCREMENTAL_BULK.getKey(), true) + .build(); + } + public void testBulkUriMatchingDoesNotMatchBulkCapabilitiesApi() throws IOException { Request request = new Request("GET", "/_capabilities?method=GET&path=%2F_bulk&capabilities=failure_store_status&pretty"); Response response = getRestClient().performRequest(request); diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/530_ignore_above_stored_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/530_ignore_above_stored_source.yml new file mode 100644 index 0000000000000..1730a49f743d9 --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/530_ignore_above_stored_source.yml @@ -0,0 +1,214 @@ +--- +ignore_above mapping level setting: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + ignore_above: 10 + mappings: + properties: + keyword: + type: keyword + flattened: + type: flattened + + - do: + index: + index: test + refresh: true + id: "1" + body: { "keyword": "foo bar", "flattened": { "value": "the quick brown fox" } } + + - do: + search: + body: + fields: + - keyword + - flattened + query: + match_all: {} + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.keyword: "foo bar" } + - match: { hits.hits.0._source.flattened.value: "the quick brown fox" } + - match: { hits.hits.0.fields.keyword.0: "foo bar" } + - match: { hits.hits.0.fields.flattened: null } + +--- +ignore_above mapping level setting on arrays: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + ignore_above: 10 + mappings: + properties: + keyword: + type: keyword + flattened: + type: flattened + + - do: + index: + index: test + refresh: true + id: "1" + body: { "keyword": ["foo bar", "the quick brown fox"], "flattened": { "value": ["the quick brown fox", "jumps over"] } } + + - do: + search: + body: + fields: + - keyword + - flattened + query: + match_all: {} + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.keyword: ["foo bar", "the quick brown fox"] } + - match: { hits.hits.0._source.flattened.value: ["the quick brown fox", "jumps over"] } + - match: { hits.hits.0.fields.keyword.0: "foo bar" } + - match: { hits.hits.0.fields.flattened.0.value: "jumps over" } + +--- +ignore_above mapping overrides setting: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + ignore_above: 10 + mappings: + properties: + keyword: + type: keyword + ignore_above: 100 + flattened: + type: flattened + ignore_above: 100 + + - do: + index: + index: test + refresh: true + id: "1" + body: { "keyword": "foo bar baz foo bar baz", "flattened": { "value": "the quick brown fox" } } + + - do: + search: + body: + fields: + - keyword + - flattened + query: + match_all: { } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.keyword: "foo bar baz foo bar baz" } + - match: { hits.hits.0._source.flattened.value: "the quick brown fox" } + - match: { hits.hits.0.fields.keyword.0: "foo bar baz foo bar baz" } + - match: { hits.hits.0.fields.flattened.0.value: "the quick brown fox" } + +--- +ignore_above mapping overrides setting on arrays: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + ignore_above: 10 + mappings: + properties: + keyword: + type: keyword + ignore_above: 100 + flattened: + type: flattened + ignore_above: 100 + + - do: + index: + index: test + refresh: true + id: "1" + body: { "keyword": ["foo bar baz foo bar baz", "the quick brown fox jumps over"], "flattened": { "value": ["the quick brown fox", "jumps over the lazy dog"] } } + + - do: + search: + body: + fields: + - keyword + - flattened + query: + match_all: { } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.keyword: ["foo bar baz foo bar baz", "the quick brown fox jumps over"] } + - match: { hits.hits.0._source.flattened.value: ["the quick brown fox", "jumps over the lazy dog"] } + - match: { hits.hits.0.fields.keyword: ["foo bar baz foo bar baz", "the quick brown fox jumps over"] } + - match: { hits.hits.0.fields.flattened.0.value: ["the quick brown fox", "jumps over the lazy dog"] } + +--- +date ignore_above index level setting: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + ignore_above: 10 + mappings: + properties: + keyword: + type: keyword + date: + type: date + format: "yyyy-MM-dd'T'HH:mm:ss" + + - do: + index: + index: test + refresh: true + id: "1" + body: { "keyword": ["2023-09-17T15:30:00", "2023-09-17T15:31:00"], "date": ["2023-09-17T15:30:00", "2023-09-17T15:31:00"] } + + - do: + search: + body: + fields: + - keyword + - date + query: + match_all: {} + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.keyword: ["2023-09-17T15:30:00", "2023-09-17T15:31:00"] } + - match: { hits.hits.0._source.date: ["2023-09-17T15:30:00", "2023-09-17T15:31:00"] } + - match: { hits.hits.0.fields.keyword: null } + - match: { hits.hits.0.fields.date: ["2023-09-17T15:30:00","2023-09-17T15:31:00"] } diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/540_ignore_above_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/540_ignore_above_synthetic_source.yml new file mode 100644 index 0000000000000..defdc8467bf8d --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/540_ignore_above_synthetic_source.yml @@ -0,0 +1,179 @@ +--- +ignore_above mapping level setting: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + ignore_above: 10 + mappings: + _source: + mode: synthetic + properties: + keyword: + type: keyword + flattened: + type: flattened + + - do: + index: + index: test + refresh: true + id: "1" + body: { "keyword": "foo bar", "flattened": { "value": "the quick brown fox" } } + + - do: + search: + body: + fields: + - keyword + - flattened + query: + match_all: {} + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.keyword: "foo bar" } + - match: { hits.hits.0._source.flattened.value: "the quick brown fox" } + - match: { hits.hits.0.fields.keyword.0: "foo bar" } + +--- +ignore_above mapping level setting on arrays: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + ignore_above: 10 + mappings: + _source: + mode: synthetic + properties: + keyword: + type: keyword + flattened: + type: flattened + + - do: + index: + index: test + refresh: true + id: "1" + body: { "keyword": ["foo bar", "the quick brown fox"], "flattened": { "value": ["the quick brown fox", "jumps over"] } } + + - do: + search: + body: + fields: + - keyword + - flattened + query: + match_all: {} + + - length: { hits.hits: 1 } + #TODO: synthetic source field reconstruction bug (TBD: add link to the issue here) + #- match: { hits.hits.0._source.keyword: ["foo bar", "the quick brown fox"] } + - match: { hits.hits.0._source.flattened.value: ["the quick brown fox", "jumps over"] } + - match: { hits.hits.0.fields.keyword.0: "foo bar" } + - match: { hits.hits.0.fields.flattened.0.value: "jumps over" } + +--- +ignore_above mapping overrides setting: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + ignore_above: 10 + mappings: + _source: + mode: synthetic + properties: + keyword: + type: keyword + ignore_above: 100 + flattened: + type: flattened + ignore_above: 100 + + - do: + index: + index: test + refresh: true + id: "1" + body: { "keyword": "foo bar baz foo bar baz", "flattened": { "value": "the quick brown fox" } } + + - do: + search: + body: + fields: + - keyword + - flattened + query: + match_all: { } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.keyword: "foo bar baz foo bar baz" } + - match: { hits.hits.0._source.flattened.value: "the quick brown fox" } + - match: { hits.hits.0.fields.keyword.0: "foo bar baz foo bar baz" } + - match: { hits.hits.0.fields.flattened.0.value: "the quick brown fox" } + +--- +ignore_above mapping overrides setting on arrays: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + ignore_above: 10 + mappings: + _source: + mode: synthetic + properties: + keyword: + type: keyword + ignore_above: 100 + flattened: + type: flattened + ignore_above: 100 + + - do: + index: + index: test + refresh: true + id: "1" + body: { "keyword": ["foo bar baz foo bar baz", "the quick brown fox jumps over"], "flattened": { "value": ["the quick brown fox", "jumps over the lazy dog"] } } + + - do: + search: + body: + fields: + - keyword + - flattened + query: + match_all: { } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.keyword: ["foo bar baz foo bar baz", "the quick brown fox jumps over"] } + - match: { hits.hits.0._source.flattened.value: ["jumps over the lazy dog", "the quick brown fox"] } + - match: { hits.hits.0.fields.keyword: ["foo bar baz foo bar baz", "the quick brown fox jumps over"] } + - match: { hits.hits.0.fields.flattened.0.value: ["jumps over the lazy dog", "the quick brown fox"] } diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/550_ignore_above_invalid.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/550_ignore_above_invalid.yml new file mode 100644 index 0000000000000..3c29845871fe7 --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/550_ignore_above_invalid.yml @@ -0,0 +1,63 @@ +--- +ignore_above index setting negative value: + - do: + catch: bad_request + indices.create: + index: test + body: + settings: + index: + mapping: + ignore_above: -1 + mappings: + properties: + keyword: + type: keyword + +--- +keyword ignore_above mapping setting negative value: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + catch: bad_request + indices.create: + index: test + body: + mappings: + properties: + keyword: + ignore_above: -2 + type: keyword + +--- +flattened ignore_above mapping setting negative value: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + catch: bad_request + indices.create: + index: test + body: + mappings: + properties: + flattened: + ignore_above: -2 + type: flattened + +--- +wildcard ignore_above mapping setting negative value: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + catch: bad_request + indices.create: + index: test + body: + mappings: + properties: + wildcard: + ignore_above: -2 + type: wildcard diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index 5292f5cd9f9cb..73c42fcce8f60 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -221,7 +221,9 @@ static TransportVersion def(int id) { public static final TransportVersion BULK_INCREMENTAL_STATE = def(8_745_00_0); public static final TransportVersion FAILURE_STORE_STATUS_IN_INDEX_RESPONSE = def(8_746_00_0); public static final TransportVersion ESQL_AGGREGATION_OPERATOR_STATUS_FINISH_NANOS = def(8_747_00_0); - public static final TransportVersion ML_INFERENCE_CHUNKING_SETTINGS = def(8_748_00_0); + public static final TransportVersion ML_TELEMETRY_MEMORY_ADDED = def(8_748_00_0); + public static final TransportVersion ILM_ADD_SEARCHABLE_SNAPSHOT_TOTAL_SHARDS_PER_NODE = def(8_749_00_0); + public static final TransportVersion ML_INFERENCE_CHUNKING_SETTINGS = def(8_750_00_0); /* * STOP! READ THIS FIRST! No, really, diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/close/CloseIndexClusterStateUpdateRequest.java b/server/src/main/java/org/elasticsearch/action/admin/indices/close/CloseIndexClusterStateUpdateRequest.java index 35e9b42a97ebc..de8db5c025d13 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/close/CloseIndexClusterStateUpdateRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/close/CloseIndexClusterStateUpdateRequest.java @@ -9,35 +9,25 @@ package org.elasticsearch.action.admin.indices.close; import org.elasticsearch.action.support.ActiveShardCount; -import org.elasticsearch.cluster.ack.IndicesClusterStateUpdateRequest; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.index.Index; + +import java.util.Objects; /** * Cluster state update request that allows to close one or more indices */ -public class CloseIndexClusterStateUpdateRequest extends IndicesClusterStateUpdateRequest { - - private long taskId; - private ActiveShardCount waitForActiveShards = ActiveShardCount.DEFAULT; - - public CloseIndexClusterStateUpdateRequest(final long taskId) { - this.taskId = taskId; - } - - public long taskId() { - return taskId; - } - - public CloseIndexClusterStateUpdateRequest taskId(final long taskId) { - this.taskId = taskId; - return this; - } - - public ActiveShardCount waitForActiveShards() { - return waitForActiveShards; - } - - public CloseIndexClusterStateUpdateRequest waitForActiveShards(final ActiveShardCount waitForActiveShards) { - this.waitForActiveShards = waitForActiveShards; - return this; +public record CloseIndexClusterStateUpdateRequest( + TimeValue masterNodeTimeout, + TimeValue ackTimeout, + long taskId, + ActiveShardCount waitForActiveShards, + Index[] indices +) { + public CloseIndexClusterStateUpdateRequest { + Objects.requireNonNull(masterNodeTimeout); + Objects.requireNonNull(ackTimeout); + Objects.requireNonNull(waitForActiveShards); + Objects.requireNonNull(indices); } } diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/close/TransportCloseIndexAction.java b/server/src/main/java/org/elasticsearch/action/admin/indices/close/TransportCloseIndexAction.java index 2d1e7805fa59a..5a4292804fd6c 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/close/TransportCloseIndexAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/close/TransportCloseIndexAction.java @@ -120,9 +120,13 @@ protected void masterOperation( return; } - final CloseIndexClusterStateUpdateRequest closeRequest = new CloseIndexClusterStateUpdateRequest(task.getId()).ackTimeout( - request.ackTimeout() - ).masterNodeTimeout(request.masterNodeTimeout()).waitForActiveShards(request.waitForActiveShards()).indices(concreteIndices); + final CloseIndexClusterStateUpdateRequest closeRequest = new CloseIndexClusterStateUpdateRequest( + request.masterNodeTimeout(), + request.ackTimeout(), + task.getId(), + request.waitForActiveShards(), + concreteIndices + ); indexStateService.closeIndices(closeRequest, listener.delegateResponse((delegatedListener, t) -> { logger.debug(() -> "failed to close indices [" + Arrays.toString(concreteIndices) + "]", t); delegatedListener.onFailure(t); diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/mapping/put/PutMappingClusterStateUpdateRequest.java b/server/src/main/java/org/elasticsearch/action/admin/indices/mapping/put/PutMappingClusterStateUpdateRequest.java index 8a75faf36c58c..2c469f4cae490 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/mapping/put/PutMappingClusterStateUpdateRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/mapping/put/PutMappingClusterStateUpdateRequest.java @@ -9,33 +9,37 @@ package org.elasticsearch.action.admin.indices.mapping.put; -import org.elasticsearch.cluster.ack.IndicesClusterStateUpdateRequest; import org.elasticsearch.common.compress.CompressedXContent; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.index.Index; import java.io.IOException; +import java.util.Objects; /** * Cluster state update request that allows to put a mapping */ -public class PutMappingClusterStateUpdateRequest extends IndicesClusterStateUpdateRequest { - - private final CompressedXContent source; - private boolean autoUpdate; - - public PutMappingClusterStateUpdateRequest(String source) throws IOException { - this.source = CompressedXContent.fromJSON(source); - } - - public CompressedXContent source() { - return source; - } - - public PutMappingClusterStateUpdateRequest autoUpdate(boolean autoUpdate) { - this.autoUpdate = autoUpdate; - return this; +public record PutMappingClusterStateUpdateRequest( + TimeValue masterNodeTimeout, + TimeValue ackTimeout, + CompressedXContent source, + boolean autoUpdate, + Index[] indices +) { + public PutMappingClusterStateUpdateRequest { + Objects.requireNonNull(masterNodeTimeout); + Objects.requireNonNull(ackTimeout); + Objects.requireNonNull(source); + Objects.requireNonNull(indices); } - public boolean autoUpdate() { - return autoUpdate; + public PutMappingClusterStateUpdateRequest( + TimeValue masterNodeTimeout, + TimeValue ackTimeout, + String source, + boolean autoUpdate, + Index... indices + ) throws IOException { + this(masterNodeTimeout, ackTimeout, CompressedXContent.fromJSON(source), autoUpdate, indices); } } diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/mapping/put/TransportPutMappingAction.java b/server/src/main/java/org/elasticsearch/action/admin/indices/mapping/put/TransportPutMappingAction.java index bb305c0d827fd..749470e181deb 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/mapping/put/TransportPutMappingAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/mapping/put/TransportPutMappingAction.java @@ -157,10 +157,13 @@ static void performMappingUpdate( }); final PutMappingClusterStateUpdateRequest updateRequest; try { - updateRequest = new PutMappingClusterStateUpdateRequest(request.source()).indices(concreteIndices) - .ackTimeout(request.ackTimeout()) - .masterNodeTimeout(request.masterNodeTimeout()) - .autoUpdate(autoUpdate); + updateRequest = new PutMappingClusterStateUpdateRequest( + request.masterNodeTimeout(), + request.ackTimeout(), + request.source(), + autoUpdate, + concreteIndices + ); } catch (IOException e) { wrappedListener.onFailure(e); return; diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/open/OpenIndexClusterStateUpdateRequest.java b/server/src/main/java/org/elasticsearch/action/admin/indices/open/OpenIndexClusterStateUpdateRequest.java index 5967c9923be13..92e2433a9b1cc 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/open/OpenIndexClusterStateUpdateRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/open/OpenIndexClusterStateUpdateRequest.java @@ -9,25 +9,24 @@ package org.elasticsearch.action.admin.indices.open; import org.elasticsearch.action.support.ActiveShardCount; -import org.elasticsearch.cluster.ack.IndicesClusterStateUpdateRequest; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.index.Index; + +import java.util.Objects; /** * Cluster state update request that allows to open one or more indices */ -public class OpenIndexClusterStateUpdateRequest extends IndicesClusterStateUpdateRequest { - - private ActiveShardCount waitForActiveShards = ActiveShardCount.DEFAULT; - - public OpenIndexClusterStateUpdateRequest() { - - } - - public ActiveShardCount waitForActiveShards() { - return waitForActiveShards; - } - - public OpenIndexClusterStateUpdateRequest waitForActiveShards(ActiveShardCount waitForActiveShards) { - this.waitForActiveShards = waitForActiveShards; - return this; +public record OpenIndexClusterStateUpdateRequest( + TimeValue masterNodeTimeout, + TimeValue ackTimeout, + ActiveShardCount waitForActiveShards, + Index[] indices +) { + public OpenIndexClusterStateUpdateRequest { + Objects.requireNonNull(masterNodeTimeout); + Objects.requireNonNull(ackTimeout); + Objects.requireNonNull(waitForActiveShards); + Objects.requireNonNull(indices); } } diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/open/TransportOpenIndexAction.java b/server/src/main/java/org/elasticsearch/action/admin/indices/open/TransportOpenIndexAction.java index 1184ad66eae0f..ffaa50f7c0f29 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/open/TransportOpenIndexAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/open/TransportOpenIndexAction.java @@ -90,23 +90,26 @@ protected void masterOperation( listener.onResponse(new OpenIndexResponse(true, true)); return; } - OpenIndexClusterStateUpdateRequest updateRequest = new OpenIndexClusterStateUpdateRequest().ackTimeout(request.ackTimeout()) - .masterNodeTimeout(request.masterNodeTimeout()) - .indices(concreteIndices) - .waitForActiveShards(request.waitForActiveShards()); - indexStateService.openIndices(updateRequest, new ActionListener<>() { + indexStateService.openIndices( + new OpenIndexClusterStateUpdateRequest( + request.masterNodeTimeout(), + request.ackTimeout(), + request.waitForActiveShards(), + concreteIndices + ), + new ActionListener<>() { + @Override + public void onResponse(ShardsAcknowledgedResponse response) { + listener.onResponse(new OpenIndexResponse(response.isAcknowledged(), response.isShardsAcknowledged())); + } - @Override - public void onResponse(ShardsAcknowledgedResponse response) { - listener.onResponse(new OpenIndexResponse(response.isAcknowledged(), response.isShardsAcknowledged())); + @Override + public void onFailure(Exception t) { + logger.debug(() -> "failed to open indices [" + Arrays.toString(concreteIndices) + "]", t); + listener.onFailure(t); + } } - - @Override - public void onFailure(Exception t) { - logger.debug(() -> "failed to open indices [" + Arrays.toString(concreteIndices) + "]", t); - listener.onFailure(t); - } - }); + ); } } diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/readonly/AddIndexBlockClusterStateUpdateRequest.java b/server/src/main/java/org/elasticsearch/action/admin/indices/readonly/AddIndexBlockClusterStateUpdateRequest.java index beaf561bfee56..50bd3b37b4cb3 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/readonly/AddIndexBlockClusterStateUpdateRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/readonly/AddIndexBlockClusterStateUpdateRequest.java @@ -8,32 +8,26 @@ */ package org.elasticsearch.action.admin.indices.readonly; -import org.elasticsearch.cluster.ack.IndicesClusterStateUpdateRequest; import org.elasticsearch.cluster.metadata.IndexMetadata.APIBlock; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.index.Index; + +import java.util.Objects; /** * Cluster state update request that allows to add a block to one or more indices */ -public class AddIndexBlockClusterStateUpdateRequest extends IndicesClusterStateUpdateRequest { - - private final APIBlock block; - private long taskId; - - public AddIndexBlockClusterStateUpdateRequest(final APIBlock block, final long taskId) { - this.block = block; - this.taskId = taskId; - } - - public long taskId() { - return taskId; - } - - public APIBlock getBlock() { - return block; - } - - public AddIndexBlockClusterStateUpdateRequest taskId(final long taskId) { - this.taskId = taskId; - return this; +public record AddIndexBlockClusterStateUpdateRequest( + TimeValue masterNodeTimeout, + TimeValue ackTimeout, + APIBlock block, + long taskId, + Index[] indices +) { + public AddIndexBlockClusterStateUpdateRequest { + Objects.requireNonNull(masterNodeTimeout); + Objects.requireNonNull(ackTimeout); + Objects.requireNonNull(block); + Objects.requireNonNull(indices); } } diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/readonly/TransportAddIndexBlockAction.java b/server/src/main/java/org/elasticsearch/action/admin/indices/readonly/TransportAddIndexBlockAction.java index 2b8f832b8aafd..867cd80fb68d0 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/readonly/TransportAddIndexBlockAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/readonly/TransportAddIndexBlockAction.java @@ -102,13 +102,18 @@ protected void masterOperation( return; } - final AddIndexBlockClusterStateUpdateRequest addBlockRequest = new AddIndexBlockClusterStateUpdateRequest( - request.getBlock(), - task.getId() - ).ackTimeout(request.ackTimeout()).masterNodeTimeout(request.masterNodeTimeout()).indices(concreteIndices); - indexStateService.addIndexBlock(addBlockRequest, listener.delegateResponse((delegatedListener, t) -> { - logger.debug(() -> "failed to mark indices as readonly [" + Arrays.toString(concreteIndices) + "]", t); - delegatedListener.onFailure(t); - })); + indexStateService.addIndexBlock( + new AddIndexBlockClusterStateUpdateRequest( + request.masterNodeTimeout(), + request.ackTimeout(), + request.getBlock(), + task.getId(), + concreteIndices + ), + listener.delegateResponse((delegatedListener, t) -> { + logger.debug(() -> "failed to mark indices as readonly [" + Arrays.toString(concreteIndices) + "]", t); + delegatedListener.onFailure(t); + }) + ); } } diff --git a/server/src/main/java/org/elasticsearch/action/bulk/IncrementalBulkService.java b/server/src/main/java/org/elasticsearch/action/bulk/IncrementalBulkService.java index 7185c4d76265e..fc264de35f510 100644 --- a/server/src/main/java/org/elasticsearch/action/bulk/IncrementalBulkService.java +++ b/server/src/main/java/org/elasticsearch/action/bulk/IncrementalBulkService.java @@ -36,7 +36,7 @@ public class IncrementalBulkService { public static final Setting INCREMENTAL_BULK = boolSetting( "rest.incremental_bulk", - true, + false, Setting.Property.NodeScope, Setting.Property.Dynamic ); diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataIndexStateService.java b/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataIndexStateService.java index 00e7d2b05f2a3..0c33878b01229 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataIndexStateService.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataIndexStateService.java @@ -470,7 +470,7 @@ public void addIndexBlock(AddIndexBlockClusterStateUpdateRequest request, Action } addBlocksQueue.submitTask( - "add-index-block-[" + request.getBlock().name + "]-" + Arrays.toString(concreteIndices), + "add-index-block-[" + request.block().name + "]-" + Arrays.toString(concreteIndices), new AddBlocksTask(request, listener), request.masterNodeTimeout() ); @@ -480,7 +480,7 @@ private class AddBlocksExecutor extends SimpleBatchedExecutor> executeTask(AddBlocksTask task, ClusterState clusterState) { - return addIndexBlock(task.request.indices(), clusterState, task.request.getBlock()); + return addIndexBlock(task.request.indices(), clusterState, task.request.block()); } @Override @@ -497,7 +497,7 @@ public void taskSucceeded(AddBlocksTask task, Map blockedIn .delegateFailure( (delegate2, verifyResults) -> finalizeBlocksQueue.submitTask( "finalize-index-block-[" - + task.request.getBlock().name + + task.request.block().name + "]-[" + blockedIndices.keySet().stream().map(Index::getName).collect(Collectors.joining(", ")) + "]", @@ -529,7 +529,7 @@ public Tuple> executeTask(FinalizeBlocksTask clusterState, task.blockedIndices, task.verifyResults, - task.request.getBlock() + task.request.block() ); assert finalizeResult.v2().size() == task.verifyResults.size(); return finalizeResult; @@ -797,9 +797,7 @@ private void sendVerifyShardBlockRequest( block, parentTaskId ); - if (request.ackTimeout() != null) { - shardRequest.timeout(request.ackTimeout()); - } + shardRequest.timeout(request.ackTimeout()); client.executeLocally(TransportVerifyShardIndexBlockAction.TYPE, shardRequest, listener); } } diff --git a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java index 69abb59689c00..ad3d7d7f1c2ec 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java @@ -151,6 +151,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings { IndexSettings.INDEX_SEARCH_IDLE_AFTER, IndexSettings.INDEX_SEARCH_THROTTLED, IndexFieldDataService.INDEX_FIELDDATA_CACHE_KEY, + IndexSettings.IGNORE_ABOVE_SETTING, FieldMapper.IGNORE_MALFORMED_SETTING, FieldMapper.COERCE_SETTING, Store.INDEX_STORE_STATS_REFRESH_INTERVAL_SETTING, diff --git a/server/src/main/java/org/elasticsearch/index/IndexSettings.java b/server/src/main/java/org/elasticsearch/index/IndexSettings.java index 92abdb39acf56..f6ad5e22b9ed7 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexSettings.java +++ b/server/src/main/java/org/elasticsearch/index/IndexSettings.java @@ -25,6 +25,7 @@ import org.elasticsearch.common.unit.ByteSizeUnit; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.core.TimeValue; +import org.elasticsearch.features.NodeFeature; import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper; import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.index.translog.Translog; @@ -697,6 +698,31 @@ public Iterator> settings() { Property.IndexSettingDeprecatedInV7AndRemovedInV8 ); + /** + * The `index.mapping.ignore_above` setting defines the maximum length for the content of a field that will be indexed + * or stored. If the length of the field’s content exceeds this limit, the field value will be ignored during indexing. + * This setting is useful for `keyword`, `flattened`, and `wildcard` fields where very large values are undesirable. + * It allows users to manage the size of indexed data by skipping fields with excessively long content. As an index-level + * setting, it applies to all `keyword` and `wildcard` fields, as well as to keyword values within `flattened` fields. + * When it comes to arrays, the `ignore_above` setting applies individually to each element of the array. If any element's + * length exceeds the specified limit, only that element will be ignored during indexing, while the rest of the array will + * still be processed. This behavior is consistent with the field-level `ignore_above` setting. + * This setting can be overridden at the field level by specifying a custom `ignore_above` value in the field mapping. + *

+ * Example usage: + *

+     * "index.mapping.ignore_above": 256
+     * 
+ */ + public static final Setting IGNORE_ABOVE_SETTING = Setting.intSetting( + "index.mapping.ignore_above", + Integer.MAX_VALUE, + 0, + Property.IndexScope, + Property.ServerlessPublic + ); + public static final NodeFeature IGNORE_ABOVE_INDEX_LEVEL_SETTING = new NodeFeature("mapper.ignore_above_index_level_setting"); + private final Index index; private final IndexVersion version; private final Logger logger; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java index 2da8d32773733..46b1dbdce4c4b 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java @@ -79,6 +79,7 @@ import static org.apache.lucene.index.IndexWriter.MAX_TERM_LENGTH; import static org.elasticsearch.core.Strings.format; +import static org.elasticsearch.index.IndexSettings.IGNORE_ABOVE_SETTING; /** * A field mapper for keywords. This mapper accepts strings and indexes them as-is. @@ -110,8 +111,6 @@ public static class Defaults { Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER ); - - public static final int IGNORE_ABOVE = Integer.MAX_VALUE; } public static class KeywordField extends Field { @@ -158,12 +157,8 @@ public static final class Builder extends FieldMapper.DimensionBuilder { m -> toType(m).fieldType().eagerGlobalOrdinals(), false ); - private final Parameter ignoreAbove = Parameter.intParam( - "ignore_above", - true, - m -> toType(m).fieldType().ignoreAbove(), - Defaults.IGNORE_ABOVE - ); + private final Parameter ignoreAbove; + private final int ignoreAboveDefault; private final Parameter indexOptions = TextParams.keywordIndexOptions(m -> toType(m).indexOptions); private final Parameter hasNorms = TextParams.norms(false, m -> toType(m).fieldType.omitNorms() == false); @@ -193,7 +188,23 @@ public static final class Builder extends FieldMapper.DimensionBuilder { private final ScriptCompiler scriptCompiler; private final IndexVersion indexCreatedVersion; - public Builder(String name, IndexAnalyzers indexAnalyzers, ScriptCompiler scriptCompiler, IndexVersion indexCreatedVersion) { + public Builder(final String name, final MappingParserContext mappingParserContext) { + this( + name, + mappingParserContext.getIndexAnalyzers(), + mappingParserContext.scriptCompiler(), + IGNORE_ABOVE_SETTING.get(mappingParserContext.getSettings()), + mappingParserContext.getIndexSettings().getIndexVersionCreated() + ); + } + + Builder( + String name, + IndexAnalyzers indexAnalyzers, + ScriptCompiler scriptCompiler, + int ignoreAboveDefault, + IndexVersion indexCreatedVersion + ) { super(name); this.indexAnalyzers = indexAnalyzers; this.scriptCompiler = Objects.requireNonNull(scriptCompiler); @@ -220,10 +231,17 @@ public Builder(String name, IndexAnalyzers indexAnalyzers, ScriptCompiler script ); } }).precludesParameters(normalizer); + this.ignoreAboveDefault = ignoreAboveDefault; + this.ignoreAbove = Parameter.intParam("ignore_above", true, m -> toType(m).fieldType().ignoreAbove(), ignoreAboveDefault) + .addValidator(v -> { + if (v < 0) { + throw new IllegalArgumentException("[ignore_above] must be positive, got [" + v + "]"); + } + }); } public Builder(String name, IndexVersion indexCreatedVersion) { - this(name, null, ScriptCompiler.NONE, indexCreatedVersion); + this(name, null, ScriptCompiler.NONE, Integer.MAX_VALUE, indexCreatedVersion); } public Builder ignoreAbove(int ignoreAbove) { @@ -370,10 +388,7 @@ public KeywordFieldMapper build(MapperBuilderContext context) { private static final IndexVersion MINIMUM_COMPATIBILITY_VERSION = IndexVersion.fromId(5000099); - public static final TypeParser PARSER = new TypeParser( - (n, c) -> new Builder(n, c.getIndexAnalyzers(), c.scriptCompiler(), c.indexVersionCreated()), - MINIMUM_COMPATIBILITY_VERSION - ); + public static final TypeParser PARSER = new TypeParser(Builder::new, MINIMUM_COMPATIBILITY_VERSION); public static final class KeywordFieldType extends StringFieldType { @@ -865,6 +880,8 @@ public boolean hasNormalizer() { private final boolean isSyntheticSource; private final IndexAnalyzers indexAnalyzers; + private final int ignoreAboveDefault; + private final int ignoreAbove; private KeywordFieldMapper( String simpleName, @@ -887,6 +904,8 @@ private KeywordFieldMapper( this.scriptCompiler = builder.scriptCompiler; this.indexCreatedVersion = builder.indexCreatedVersion; this.isSyntheticSource = isSyntheticSource; + this.ignoreAboveDefault = builder.ignoreAboveDefault; + this.ignoreAbove = builder.ignoreAbove.getValue(); } @Override @@ -1004,7 +1023,9 @@ public Map indexAnalyzers() { @Override public FieldMapper.Builder getMergeBuilder() { - return new Builder(leafName(), indexAnalyzers, scriptCompiler, indexCreatedVersion).dimension(fieldType().isDimension()).init(this); + return new Builder(leafName(), indexAnalyzers, scriptCompiler, ignoreAboveDefault, indexCreatedVersion).dimension( + fieldType().isDimension() + ).init(this); } @Override @@ -1072,7 +1093,7 @@ protected BytesRef preserve(BytesRef value) { }); } - if (fieldType().ignoreAbove != Defaults.IGNORE_ABOVE) { + if (fieldType().ignoreAbove != ignoreAboveDefault) { layers.add(new CompositeSyntheticFieldLoader.StoredFieldLayer(originalName()) { @Override protected void writeValue(Object value, XContentBuilder b) throws IOException { diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java index d18c3283ef909..d2ca7a24a78fd 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java @@ -11,6 +11,7 @@ import org.elasticsearch.features.FeatureSpecification; import org.elasticsearch.features.NodeFeature; +import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.mapper.flattened.FlattenedFieldMapper; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; @@ -41,6 +42,7 @@ public Set getFeatures() { SourceFieldMapper.SYNTHETIC_SOURCE_WITH_COPY_TO_AND_DOC_VALUES_FALSE_SUPPORT, SourceFieldMapper.SYNTHETIC_SOURCE_COPY_TO_FIX, FlattenedFieldMapper.IGNORE_ABOVE_SUPPORT, + IndexSettings.IGNORE_ABOVE_INDEX_LEVEL_SETTING, SourceFieldMapper.SYNTHETIC_SOURCE_COPY_TO_INSIDE_OBJECTS_FIX ); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapper.java index 43890abf25aa7..2c504262c35ad 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapper.java @@ -82,6 +82,8 @@ import java.util.Set; import java.util.function.Function; +import static org.elasticsearch.index.IndexSettings.IGNORE_ABOVE_SETTING; + /** * A field mapper that accepts a JSON object and flattens it into a single field. This data type * can be a useful alternative to an 'object' mapping when the object has a large, unknown set @@ -123,6 +125,9 @@ private static Builder builder(Mapper in) { return ((FlattenedFieldMapper) in).builder; } + private final int ignoreAboveDefault; + private final int ignoreAbove; + public static class Builder extends FieldMapper.Builder { final Parameter depthLimit = Parameter.intParam( @@ -148,12 +153,8 @@ public static class Builder extends FieldMapper.Builder { m -> builder(m).eagerGlobalOrdinals.get(), false ); - private final Parameter ignoreAbove = Parameter.intParam( - "ignore_above", - true, - m -> builder(m).ignoreAbove.get(), - Integer.MAX_VALUE - ); + private final int ignoreAboveDefault; + private final Parameter ignoreAbove; private final Parameter indexOptions = TextParams.keywordIndexOptions(m -> builder(m).indexOptions.get()); private final Parameter similarity = TextParams.similarity(m -> builder(m).similarity.get()); @@ -176,7 +177,7 @@ public static class Builder extends FieldMapper.Builder { + "] are true" ); } - }).precludesParameters(ignoreAbove); + }); private final Parameter> meta = Parameter.metaParam(); @@ -184,8 +185,20 @@ public static FieldMapper.Parameter> dimensionsParam(Function builder(m).ignoreAbove.get(), ignoreAboveDefault) + .addValidator(v -> { + if (v < 0) { + throw new IllegalArgumentException("[ignore_above] must be positive, got [" + v + "]"); + } + }); + this.dimensions.precludesParameters(ignoreAbove); } @Override @@ -223,11 +236,11 @@ public FlattenedFieldMapper build(MapperBuilderContext context) { dimensions.get(), ignoreAbove.getValue() ); - return new FlattenedFieldMapper(leafName(), ft, builderParams(this, context), this); + return new FlattenedFieldMapper(leafName(), ft, builderParams(this, context), ignoreAboveDefault, this); } } - public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n)); + public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, IGNORE_ABOVE_SETTING.get(c.getSettings()))); /** * A field type that represents the values under a particular JSON key, used @@ -808,9 +821,17 @@ public void validateMatchedRoutingPath(final String routingPath) { private final FlattenedFieldParser fieldParser; private final Builder builder; - private FlattenedFieldMapper(String leafName, MappedFieldType mappedFieldType, BuilderParams builderParams, Builder builder) { + private FlattenedFieldMapper( + String leafName, + MappedFieldType mappedFieldType, + BuilderParams builderParams, + int ignoreAboveDefault, + Builder builder + ) { super(leafName, mappedFieldType, builderParams); + this.ignoreAboveDefault = ignoreAboveDefault; this.builder = builder; + this.ignoreAbove = builder.ignoreAbove.get(); this.fieldParser = new FlattenedFieldParser( mappedFieldType.name(), mappedFieldType.name() + KEYED_FIELD_SUFFIX, @@ -835,8 +856,8 @@ int depthLimit() { return builder.depthLimit.get(); } - int ignoreAbove() { - return builder.ignoreAbove.get(); + public int ignoreAbove() { + return ignoreAbove; } @Override @@ -876,7 +897,7 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio @Override public FieldMapper.Builder getMergeBuilder() { - return new Builder(leafName()).init(this); + return new Builder(leafName(), ignoreAboveDefault).init(this); } @Override diff --git a/server/src/test/java/org/elasticsearch/action/admin/cluster/stats/CCSTelemetrySnapshotTests.java b/server/src/test/java/org/elasticsearch/action/admin/cluster/stats/CCSTelemetrySnapshotTests.java index 0bca6e57dc47b..e9188d9cb8f0d 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/cluster/stats/CCSTelemetrySnapshotTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/cluster/stats/CCSTelemetrySnapshotTests.java @@ -33,7 +33,7 @@ public class CCSTelemetrySnapshotTests extends AbstractWireSerializingTestCase> implements Comparable { builder.startObject(); - if (includeFrom && from == null && randomBoolean()) { + if (includeFrom && from == null && skipDefaultFrom) { // skip field entirely since it is equivalent to a default value } else { builder.field(fromKey, from); } - if (includeTo && to == null && randomBoolean()) { + if (includeTo && to == null && skipDefaultTo) { // skip field entirely since it is equivalent to a default value } else { builder.field(toKey, to); diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java index 92e480aff3bc9..cca3443c28e3a 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java @@ -26,7 +26,6 @@ import org.elasticsearch.action.ActionRequest; import org.elasticsearch.action.ActionResponse; import org.elasticsearch.action.ActionType; -import org.elasticsearch.action.DocWriteRequest; import org.elasticsearch.action.DocWriteResponse; import org.elasticsearch.action.admin.cluster.allocation.ClusterAllocationExplainRequest; import org.elasticsearch.action.admin.cluster.allocation.ClusterAllocationExplainResponse; @@ -49,8 +48,6 @@ import org.elasticsearch.action.admin.indices.template.put.PutIndexTemplateRequestBuilder; import org.elasticsearch.action.bulk.BulkRequestBuilder; import org.elasticsearch.action.bulk.BulkResponse; -import org.elasticsearch.action.bulk.IncrementalBulkService; -import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.ingest.DeletePipelineRequest; import org.elasticsearch.action.ingest.DeletePipelineTransportAction; @@ -196,7 +193,6 @@ import java.util.Random; import java.util.Set; import java.util.concurrent.Callable; -import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutionException; import java.util.concurrent.Executor; @@ -1782,48 +1778,11 @@ public void indexRandom(boolean forceRefresh, boolean dummyDocuments, boolean ma logger.info("Index [{}] docs async: [{}] bulk: [{}] partitions [{}]", builders.size(), false, true, partition.size()); for (List segmented : partition) { BulkResponse actionGet; - if (randomBoolean()) { - BulkRequestBuilder bulkBuilder = client().prepareBulk(); - for (IndexRequestBuilder indexRequestBuilder : segmented) { - bulkBuilder.add(indexRequestBuilder); - } - actionGet = bulkBuilder.get(); - } else { - IncrementalBulkService bulkService = internalCluster().getInstance(IncrementalBulkService.class); - IncrementalBulkService.Handler handler = bulkService.newBulkRequest(); - - ConcurrentLinkedQueue queue = new ConcurrentLinkedQueue<>(); - segmented.forEach(b -> queue.add(b.request())); - - PlainActionFuture future = new PlainActionFuture<>(); - AtomicInteger runs = new AtomicInteger(0); - Runnable r = new Runnable() { - - @Override - public void run() { - int toRemove = Math.min(randomIntBetween(5, 10), queue.size()); - ArrayList> docs = new ArrayList<>(); - for (int i = 0; i < toRemove; i++) { - docs.add(queue.poll()); - } - - if (queue.isEmpty()) { - handler.lastItems(docs, () -> {}, future); - } else { - handler.addItems(docs, () -> {}, () -> { - // Every 10 runs dispatch to new thread to prevent stackoverflow - if (runs.incrementAndGet() % 10 == 0) { - new Thread(this).start(); - } else { - this.run(); - } - }); - } - } - }; - r.run(); - actionGet = future.actionGet(); + BulkRequestBuilder bulkBuilder = client().prepareBulk(); + for (IndexRequestBuilder indexRequestBuilder : segmented) { + bulkBuilder.add(indexRequestBuilder); } + actionGet = bulkBuilder.get(); assertThat(actionGet.hasFailures() ? actionGet.buildFailureMessage() : "", actionGet.hasFailures(), equalTo(false)); } } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/MountSnapshotStep.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/MountSnapshotStep.java index aac4d74144e95..7d045f2950e1b 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/MountSnapshotStep.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/MountSnapshotStep.java @@ -18,11 +18,13 @@ import org.elasticsearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider; import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.xpack.core.searchablesnapshots.MountSearchableSnapshotAction; import org.elasticsearch.xpack.core.searchablesnapshots.MountSearchableSnapshotRequest; +import java.util.ArrayList; import java.util.Objects; import java.util.Optional; @@ -37,17 +39,34 @@ public class MountSnapshotStep extends AsyncRetryDuringSnapshotActionStep { private final String restoredIndexPrefix; private final MountSearchableSnapshotRequest.Storage storageType; + @Nullable + private final Integer totalShardsPerNode; public MountSnapshotStep( StepKey key, StepKey nextStepKey, Client client, String restoredIndexPrefix, - MountSearchableSnapshotRequest.Storage storageType + MountSearchableSnapshotRequest.Storage storageType, + @Nullable Integer totalShardsPerNode ) { super(key, nextStepKey, client); this.restoredIndexPrefix = restoredIndexPrefix; this.storageType = Objects.requireNonNull(storageType, "a storage type must be specified"); + if (totalShardsPerNode != null && totalShardsPerNode < 1) { + throw new IllegalArgumentException("[" + SearchableSnapshotAction.TOTAL_SHARDS_PER_NODE.getPreferredName() + "] must be >= 1"); + } + this.totalShardsPerNode = totalShardsPerNode; + } + + public MountSnapshotStep( + StepKey key, + StepKey nextStepKey, + Client client, + String restoredIndexPrefix, + MountSearchableSnapshotRequest.Storage storageType + ) { + this(key, nextStepKey, client, restoredIndexPrefix, storageType, null); } @Override @@ -63,6 +82,11 @@ public MountSearchableSnapshotRequest.Storage getStorage() { return storageType; } + @Nullable + public Integer getTotalShardsPerNode() { + return totalShardsPerNode; + } + @Override void performDuringNoSnapshot(IndexMetadata indexMetadata, ClusterState currentClusterState, ActionListener listener) { String indexName = indexMetadata.getIndex().getName(); @@ -140,6 +164,9 @@ void performDuringNoSnapshot(IndexMetadata indexMetadata, ClusterState currentCl final Settings.Builder settingsBuilder = Settings.builder(); overrideTierPreference(this.getKey().phase()).ifPresent(override -> settingsBuilder.put(DataTier.TIER_PREFERENCE, override)); + if (totalShardsPerNode != null) { + settingsBuilder.put(ShardsLimitAllocationDecider.INDEX_TOTAL_SHARDS_PER_NODE_SETTING.getKey(), totalShardsPerNode); + } final MountSearchableSnapshotRequest mountSearchableSnapshotRequest = new MountSearchableSnapshotRequest( TimeValue.MAX_VALUE, @@ -148,9 +175,9 @@ void performDuringNoSnapshot(IndexMetadata indexMetadata, ClusterState currentCl snapshotName, indexName, settingsBuilder.build(), - ignoredIndexSettings(this.getKey().phase()), + ignoredIndexSettings(), // we'll not wait for the snapshot to complete in this step as the async steps are executed from threads that shouldn't - // perform expensive operations (ie. clusterStateProcessed) + // perform expensive operations (i.e. clusterStateProcessed) false, storageType ); @@ -198,23 +225,27 @@ static Optional overrideTierPreference(String phase) { * setting, the restored index would be captured by the ILM runner and, depending on what ILM execution state was captured at snapshot * time, make it's way forward from _that_ step forward in the ILM policy. We'll re-set this setting on the restored index at a later * step once we restored a deterministic execution state - * - index.routing.allocation.total_shards_per_node: It is likely that frozen tier has fewer nodes than the hot tier. - * Keeping this setting runs the risk that we will not have enough nodes to allocate all the shards in the - * frozen tier and the user does not have any way of fixing this. For this reason, we ignore this setting when moving to frozen. + * - index.routing.allocation.total_shards_per_node: It is likely that frozen tier has fewer nodes than the hot tier. If this setting + * is not specifically set in the frozen tier, keeping this setting runs the risk that we will not have enough nodes to + * allocate all the shards in the frozen tier and the user does not have any way of fixing this. For this reason, we ignore this + * setting when moving to frozen. We do not ignore this setting if it is specifically set in the mount searchable snapshot step + * of frozen tier. */ - static String[] ignoredIndexSettings(String phase) { + String[] ignoredIndexSettings() { + ArrayList ignoredSettings = new ArrayList<>(); + ignoredSettings.add(LifecycleSettings.LIFECYCLE_NAME); // if we are mounting a searchable snapshot in the hot phase, then we should not change the total_shards_per_node setting - if (TimeseriesLifecycleType.FROZEN_PHASE.equals(phase)) { - return new String[] { - LifecycleSettings.LIFECYCLE_NAME, - ShardsLimitAllocationDecider.INDEX_TOTAL_SHARDS_PER_NODE_SETTING.getKey() }; + // if total_shards_per_node setting is specifically set for the frozen phase and not propagated from previous phase, + // then it should not be ignored + if (TimeseriesLifecycleType.FROZEN_PHASE.equals(this.getKey().phase()) && this.totalShardsPerNode == null) { + ignoredSettings.add(ShardsLimitAllocationDecider.INDEX_TOTAL_SHARDS_PER_NODE_SETTING.getKey()); } - return new String[] { LifecycleSettings.LIFECYCLE_NAME }; + return ignoredSettings.toArray(new String[0]); } @Override public int hashCode() { - return Objects.hash(super.hashCode(), restoredIndexPrefix, storageType); + return Objects.hash(super.hashCode(), restoredIndexPrefix, storageType, totalShardsPerNode); } @Override @@ -228,6 +259,7 @@ public boolean equals(Object obj) { MountSnapshotStep other = (MountSnapshotStep) obj; return super.equals(obj) && Objects.equals(restoredIndexPrefix, other.restoredIndexPrefix) - && Objects.equals(storageType, other.storageType); + && Objects.equals(storageType, other.storageType) + && Objects.equals(totalShardsPerNode, other.totalShardsPerNode); } } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/SearchableSnapshotAction.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/SearchableSnapshotAction.java index 5b9b559b4d957..c06dcc0f083d1 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/SearchableSnapshotAction.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/SearchableSnapshotAction.java @@ -32,6 +32,7 @@ import java.util.List; import java.util.Objects; +import static org.elasticsearch.TransportVersions.ILM_ADD_SEARCHABLE_SNAPSHOT_TOTAL_SHARDS_PER_NODE; import static org.elasticsearch.snapshots.SearchableSnapshotsSettings.SEARCHABLE_SNAPSHOTS_REPOSITORY_NAME_SETTING_KEY; import static org.elasticsearch.snapshots.SearchableSnapshotsSettings.SEARCHABLE_SNAPSHOTS_SNAPSHOT_NAME_SETTING_KEY; import static org.elasticsearch.snapshots.SearchableSnapshotsSettings.SEARCHABLE_SNAPSHOT_PARTIAL_SETTING_KEY; @@ -49,6 +50,7 @@ public class SearchableSnapshotAction implements LifecycleAction { public static final ParseField SNAPSHOT_REPOSITORY = new ParseField("snapshot_repository"); public static final ParseField FORCE_MERGE_INDEX = new ParseField("force_merge_index"); + public static final ParseField TOTAL_SHARDS_PER_NODE = new ParseField("total_shards_per_node"); public static final String CONDITIONAL_DATASTREAM_CHECK_KEY = BranchingStep.NAME + "-on-datastream-check"; public static final String CONDITIONAL_SKIP_ACTION_STEP = BranchingStep.NAME + "-check-prerequisites"; public static final String CONDITIONAL_SKIP_GENERATE_AND_CLEAN = BranchingStep.NAME + "-check-existing-snapshot"; @@ -58,12 +60,13 @@ public class SearchableSnapshotAction implements LifecycleAction { private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( NAME, - a -> new SearchableSnapshotAction((String) a[0], a[1] == null || (boolean) a[1]) + a -> new SearchableSnapshotAction((String) a[0], a[1] == null || (boolean) a[1], (Integer) a[2]) ); static { PARSER.declareString(ConstructingObjectParser.constructorArg(), SNAPSHOT_REPOSITORY); PARSER.declareBoolean(ConstructingObjectParser.optionalConstructorArg(), FORCE_MERGE_INDEX); + PARSER.declareInt(ConstructingObjectParser.optionalConstructorArg(), TOTAL_SHARDS_PER_NODE); } public static SearchableSnapshotAction parse(XContentParser parser) { @@ -72,22 +75,36 @@ public static SearchableSnapshotAction parse(XContentParser parser) { private final String snapshotRepository; private final boolean forceMergeIndex; + @Nullable + private final Integer totalShardsPerNode; - public SearchableSnapshotAction(String snapshotRepository, boolean forceMergeIndex) { + public SearchableSnapshotAction(String snapshotRepository, boolean forceMergeIndex, @Nullable Integer totalShardsPerNode) { if (Strings.hasText(snapshotRepository) == false) { throw new IllegalArgumentException("the snapshot repository must be specified"); } this.snapshotRepository = snapshotRepository; this.forceMergeIndex = forceMergeIndex; + + if (totalShardsPerNode != null && totalShardsPerNode < 1) { + throw new IllegalArgumentException("[" + TOTAL_SHARDS_PER_NODE.getPreferredName() + "] must be >= 1"); + } + this.totalShardsPerNode = totalShardsPerNode; + } + + public SearchableSnapshotAction(String snapshotRepository, boolean forceMergeIndex) { + this(snapshotRepository, forceMergeIndex, null); } public SearchableSnapshotAction(String snapshotRepository) { - this(snapshotRepository, true); + this(snapshotRepository, true, null); } public SearchableSnapshotAction(StreamInput in) throws IOException { this.snapshotRepository = in.readString(); this.forceMergeIndex = in.readBoolean(); + this.totalShardsPerNode = in.getTransportVersion().onOrAfter(ILM_ADD_SEARCHABLE_SNAPSHOT_TOTAL_SHARDS_PER_NODE) + ? in.readOptionalInt() + : null; } boolean isForceMergeIndex() { @@ -98,6 +115,10 @@ public String getSnapshotRepository() { return snapshotRepository; } + public Integer getTotalShardsPerNode() { + return totalShardsPerNode; + } + @Override public List toSteps(Client client, String phase, StepKey nextStepKey) { assert false; @@ -298,7 +319,8 @@ public List toSteps(Client client, String phase, StepKey nextStepKey, XPac waitForGreenRestoredIndexKey, client, getRestoredIndexPrefix(mountSnapshotKey), - storageType + storageType, + totalShardsPerNode ); WaitForIndexColorStep waitForGreenIndexHealthStep = new WaitForIndexColorStep( waitForGreenRestoredIndexKey, @@ -402,6 +424,9 @@ public String getWriteableName() { public void writeTo(StreamOutput out) throws IOException { out.writeString(snapshotRepository); out.writeBoolean(forceMergeIndex); + if (out.getTransportVersion().onOrAfter(ILM_ADD_SEARCHABLE_SNAPSHOT_TOTAL_SHARDS_PER_NODE)) { + out.writeOptionalInt(totalShardsPerNode); + } } @Override @@ -409,6 +434,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.startObject(); builder.field(SNAPSHOT_REPOSITORY.getPreferredName(), snapshotRepository); builder.field(FORCE_MERGE_INDEX.getPreferredName(), forceMergeIndex); + if (totalShardsPerNode != null) { + builder.field(TOTAL_SHARDS_PER_NODE.getPreferredName(), totalShardsPerNode); + } builder.endObject(); return builder; } @@ -422,12 +450,14 @@ public boolean equals(Object o) { return false; } SearchableSnapshotAction that = (SearchableSnapshotAction) o; - return Objects.equals(snapshotRepository, that.snapshotRepository) && Objects.equals(forceMergeIndex, that.forceMergeIndex); + return Objects.equals(snapshotRepository, that.snapshotRepository) + && Objects.equals(forceMergeIndex, that.forceMergeIndex) + && Objects.equals(totalShardsPerNode, that.totalShardsPerNode); } @Override public int hashCode() { - return Objects.hash(snapshotRepository, forceMergeIndex); + return Objects.hash(snapshotRepository, forceMergeIndex, totalShardsPerNode); } @Nullable diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/MachineLearningFeatureSetUsage.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/MachineLearningFeatureSetUsage.java index 98c31dd9106d0..60484675ec90b 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/MachineLearningFeatureSetUsage.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/MachineLearningFeatureSetUsage.java @@ -31,11 +31,13 @@ public class MachineLearningFeatureSetUsage extends XPackFeatureSet.Usage { public static final String NODE_COUNT = "node_count"; public static final String DATA_FRAME_ANALYTICS_JOBS_FIELD = "data_frame_analytics_jobs"; public static final String INFERENCE_FIELD = "inference"; + public static final String MEMORY_FIELD = "memory"; private final Map jobsUsage; private final Map datafeedsUsage; private final Map analyticsUsage; private final Map inferenceUsage; + private final Map memoryUsage; private final int nodeCount; public MachineLearningFeatureSetUsage( @@ -45,6 +47,7 @@ public MachineLearningFeatureSetUsage( Map datafeedsUsage, Map analyticsUsage, Map inferenceUsage, + Map memoryUsage, int nodeCount ) { super(XPackField.MACHINE_LEARNING, available, enabled); @@ -52,6 +55,7 @@ public MachineLearningFeatureSetUsage( this.datafeedsUsage = Objects.requireNonNull(datafeedsUsage); this.analyticsUsage = Objects.requireNonNull(analyticsUsage); this.inferenceUsage = Objects.requireNonNull(inferenceUsage); + this.memoryUsage = Objects.requireNonNull(memoryUsage); this.nodeCount = nodeCount; } @@ -62,6 +66,11 @@ public MachineLearningFeatureSetUsage(StreamInput in) throws IOException { this.analyticsUsage = in.readGenericMap(); this.inferenceUsage = in.readGenericMap(); this.nodeCount = in.readInt(); + if (in.getTransportVersion().onOrAfter(TransportVersions.ML_TELEMETRY_MEMORY_ADDED)) { + this.memoryUsage = in.readGenericMap(); + } else { + this.memoryUsage = Map.of(); + } } @Override @@ -77,6 +86,9 @@ public void writeTo(StreamOutput out) throws IOException { out.writeGenericMap(analyticsUsage); out.writeGenericMap(inferenceUsage); out.writeInt(nodeCount); + if (out.getTransportVersion().onOrAfter(TransportVersions.ML_TELEMETRY_MEMORY_ADDED)) { + out.writeGenericMap(memoryUsage); + } } @Override @@ -86,9 +98,51 @@ protected void innerXContent(XContentBuilder builder, Params params) throws IOEx builder.field(DATAFEEDS_FIELD, datafeedsUsage); builder.field(DATA_FRAME_ANALYTICS_JOBS_FIELD, analyticsUsage); builder.field(INFERENCE_FIELD, inferenceUsage); + builder.field(MEMORY_FIELD, memoryUsage); if (nodeCount >= 0) { builder.field(NODE_COUNT, nodeCount); } } + public Map getJobsUsage() { + return jobsUsage; + } + + public Map getDatafeedsUsage() { + return datafeedsUsage; + } + + public Map getAnalyticsUsage() { + return analyticsUsage; + } + + public Map getInferenceUsage() { + return inferenceUsage; + } + + public Map getMemoryUsage() { + return memoryUsage; + } + + public int getNodeCount() { + return nodeCount; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + MachineLearningFeatureSetUsage that = (MachineLearningFeatureSetUsage) o; + return nodeCount == that.nodeCount + && Objects.equals(jobsUsage, that.jobsUsage) + && Objects.equals(datafeedsUsage, that.datafeedsUsage) + && Objects.equals(analyticsUsage, that.analyticsUsage) + && Objects.equals(inferenceUsage, that.inferenceUsage) + && Objects.equals(memoryUsage, that.memoryUsage); + } + + @Override + public int hashCode() { + return Objects.hash(jobsUsage, datafeedsUsage, analyticsUsage, inferenceUsage, memoryUsage, nodeCount); + } } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/LifecyclePolicyTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/LifecyclePolicyTests.java index 66aa9a24cbcd4..7963d04e0f666 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/LifecyclePolicyTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/LifecyclePolicyTests.java @@ -224,7 +224,11 @@ public static LifecyclePolicy randomTimeseriesLifecyclePolicy(@Nullable String l frozenTime, Collections.singletonMap( SearchableSnapshotAction.NAME, - new SearchableSnapshotAction(randomAlphaOfLength(10), randomBoolean()) + new SearchableSnapshotAction( + randomAlphaOfLength(10), + randomBoolean(), + (randomBoolean() ? null : randomIntBetween(1, 100)) + ) ) ) ); diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/MountSnapshotStepTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/MountSnapshotStepTests.java index 2b5a0535caa0e..8ca7a00ab0948 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/MountSnapshotStepTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/MountSnapshotStepTests.java @@ -41,7 +41,8 @@ public MountSnapshotStep createRandomInstance() { StepKey nextStepKey = randomStepKey(); String restoredIndexPrefix = randomAlphaOfLength(10); MountSearchableSnapshotRequest.Storage storage = randomStorageType(); - return new MountSnapshotStep(stepKey, nextStepKey, client, restoredIndexPrefix, storage); + Integer totalShardsPerNode = randomTotalShardsPerNode(true); + return new MountSnapshotStep(stepKey, nextStepKey, client, restoredIndexPrefix, storage, totalShardsPerNode); } public static MountSearchableSnapshotRequest.Storage randomStorageType() { @@ -59,7 +60,8 @@ protected MountSnapshotStep copyInstance(MountSnapshotStep instance) { instance.getNextStepKey(), instance.getClient(), instance.getRestoredIndexPrefix(), - instance.getStorage() + instance.getStorage(), + instance.getTotalShardsPerNode() ); } @@ -69,7 +71,8 @@ public MountSnapshotStep mutateInstance(MountSnapshotStep instance) { StepKey nextKey = instance.getNextStepKey(); String restoredIndexPrefix = instance.getRestoredIndexPrefix(); MountSearchableSnapshotRequest.Storage storage = instance.getStorage(); - switch (between(0, 3)) { + Integer totalShardsPerNode = instance.getTotalShardsPerNode(); + switch (between(0, 4)) { case 0: key = new StepKey(key.phase(), key.action(), key.name() + randomAlphaOfLength(5)); break; @@ -88,10 +91,30 @@ public MountSnapshotStep mutateInstance(MountSnapshotStep instance) { throw new AssertionError("unknown storage type: " + storage); } break; + case 4: + totalShardsPerNode = totalShardsPerNode == null ? 1 : totalShardsPerNode + randomIntBetween(1, 100); + break; default: throw new AssertionError("Illegal randomisation branch"); } - return new MountSnapshotStep(key, nextKey, instance.getClient(), restoredIndexPrefix, storage); + return new MountSnapshotStep(key, nextKey, instance.getClient(), restoredIndexPrefix, storage, totalShardsPerNode); + } + + public void testCreateWithInvalidTotalShardsPerNode() throws Exception { + int invalidTotalShardsPerNode = randomIntBetween(-100, 0); + + IllegalArgumentException exception = expectThrows( + IllegalArgumentException.class, + () -> new MountSnapshotStep( + randomStepKey(), + randomStepKey(), + client, + RESTORED_INDEX_PREFIX, + randomStorageType(), + invalidTotalShardsPerNode + ) + ); + assertEquals("[total_shards_per_node] must be >= 1", exception.getMessage()); } public void testPerformActionFailure() { @@ -345,7 +368,50 @@ public void testIgnoreTotalShardsPerNodeInFrozenPhase() throws Exception { randomStepKey(), client, RESTORED_INDEX_PREFIX, - randomStorageType() + randomStorageType(), + null + ); + performActionAndWait(step, indexMetadata, clusterState, null); + } + } + + public void testDoNotIgnoreTotalShardsPerNodeIfSet() throws Exception { + String indexName = randomAlphaOfLength(10); + String policyName = "test-ilm-policy"; + Map ilmCustom = new HashMap<>(); + String snapshotName = indexName + "-" + policyName; + ilmCustom.put("snapshot_name", snapshotName); + String repository = "repository"; + ilmCustom.put("snapshot_repository", repository); + + IndexMetadata.Builder indexMetadataBuilder = IndexMetadata.builder(indexName) + .settings(settings(IndexVersion.current()).put(LifecycleSettings.LIFECYCLE_NAME, policyName)) + .putCustom(LifecycleExecutionState.ILM_CUSTOM_METADATA_KEY, ilmCustom) + .numberOfShards(randomIntBetween(1, 5)) + .numberOfReplicas(randomIntBetween(0, 5)); + IndexMetadata indexMetadata = indexMetadataBuilder.build(); + + ClusterState clusterState = ClusterState.builder(emptyClusterState()) + .metadata(Metadata.builder().put(indexMetadata, true).build()) + .build(); + + try (var threadPool = createThreadPool()) { + final var client = getRestoreSnapshotRequestAssertingClient( + threadPool, + repository, + snapshotName, + indexName, + RESTORED_INDEX_PREFIX, + indexName, + new String[] { LifecycleSettings.LIFECYCLE_NAME } + ); + MountSnapshotStep step = new MountSnapshotStep( + new StepKey(TimeseriesLifecycleType.FROZEN_PHASE, randomAlphaOfLength(10), randomAlphaOfLength(10)), + randomStepKey(), + client, + RESTORED_INDEX_PREFIX, + randomStorageType(), + randomTotalShardsPerNode(false) ); performActionAndWait(step, indexMetadata, clusterState, null); } @@ -401,4 +467,10 @@ protected void } }; } + + private Integer randomTotalShardsPerNode(boolean nullable) { + Integer randomInt = randomIntBetween(1, 100); + Integer randomIntNullable = (randomBoolean() ? null : randomInt); + return nullable ? randomIntNullable : randomInt; + } } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/SearchableSnapshotActionTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/SearchableSnapshotActionTests.java index 193d9abeec91d..ca219fdde3d57 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/SearchableSnapshotActionTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/SearchableSnapshotActionTests.java @@ -16,6 +16,7 @@ import java.util.List; import static org.elasticsearch.xpack.core.ilm.SearchableSnapshotAction.NAME; +import static org.elasticsearch.xpack.core.ilm.SearchableSnapshotAction.TOTAL_SHARDS_PER_NODE; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.is; @@ -97,6 +98,16 @@ public void testPrefixAndStorageTypeDefaults() { ); } + public void testCreateWithInvalidTotalShardsPerNode() { + int invalidTotalShardsPerNode = randomIntBetween(-100, 0); + + IllegalArgumentException exception = expectThrows( + IllegalArgumentException.class, + () -> new SearchableSnapshotAction("test", true, invalidTotalShardsPerNode) + ); + assertEquals("[" + TOTAL_SHARDS_PER_NODE.getPreferredName() + "] must be >= 1", exception.getMessage()); + } + private List expectedStepKeysWithForceMerge(String phase) { return List.of( new StepKey(phase, NAME, SearchableSnapshotAction.CONDITIONAL_SKIP_ACTION_STEP), @@ -160,14 +171,23 @@ protected Writeable.Reader instanceReader() { @Override protected SearchableSnapshotAction mutateInstance(SearchableSnapshotAction instance) { - return switch (randomIntBetween(0, 1)) { + return switch (randomIntBetween(0, 2)) { case 0 -> new SearchableSnapshotAction(randomAlphaOfLengthBetween(5, 10), instance.isForceMergeIndex()); case 1 -> new SearchableSnapshotAction(instance.getSnapshotRepository(), instance.isForceMergeIndex() == false); + case 2 -> new SearchableSnapshotAction( + instance.getSnapshotRepository(), + instance.isForceMergeIndex(), + instance.getTotalShardsPerNode() == null ? 1 : instance.getTotalShardsPerNode() + randomIntBetween(1, 100) + ); default -> throw new IllegalArgumentException("Invalid mutation branch"); }; } static SearchableSnapshotAction randomInstance() { - return new SearchableSnapshotAction(randomAlphaOfLengthBetween(5, 10), randomBoolean()); + return new SearchableSnapshotAction( + randomAlphaOfLengthBetween(5, 10), + randomBoolean(), + (randomBoolean() ? null : randomIntBetween(1, 100)) + ); } } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/MachineLearningFeatureSetUsageTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/MachineLearningFeatureSetUsageTests.java new file mode 100644 index 0000000000000..87d658c6f983c --- /dev/null +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/MachineLearningFeatureSetUsageTests.java @@ -0,0 +1,75 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.core.ml; + +import org.elasticsearch.TransportVersion; +import org.elasticsearch.TransportVersions; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.core.Tuple; + +import java.io.IOException; +import java.util.Collections; + +public class MachineLearningFeatureSetUsageTests extends AbstractBWCWireSerializationTestCase { + @Override + protected Writeable.Reader instanceReader() { + return MachineLearningFeatureSetUsage::new; + } + + @Override + protected MachineLearningFeatureSetUsage createTestInstance() { + boolean enabled = randomBoolean(); + + if (enabled == false) { + return new MachineLearningFeatureSetUsage( + randomBoolean(), + enabled, + Collections.emptyMap(), + Collections.emptyMap(), + Collections.emptyMap(), + Collections.emptyMap(), + Collections.emptyMap(), + 0 + ); + } else { + return new MachineLearningFeatureSetUsage( + randomBoolean(), + enabled, + randomMap(0, 4, () -> new Tuple<>(randomAlphaOfLength(4), randomAlphaOfLength(4))), + randomMap(0, 4, () -> new Tuple<>(randomAlphaOfLength(4), randomAlphaOfLength(4))), + randomMap(0, 4, () -> new Tuple<>(randomAlphaOfLength(4), randomAlphaOfLength(4))), + randomMap(0, 4, () -> new Tuple<>(randomAlphaOfLength(4), randomAlphaOfLength(4))), + randomMap(0, 4, () -> new Tuple<>(randomAlphaOfLength(4), randomAlphaOfLength(4))), + randomIntBetween(1, 10) + ); + } + } + + @Override + protected MachineLearningFeatureSetUsage mutateInstance(MachineLearningFeatureSetUsage instance) throws IOException { + return null; + } + + @Override + protected MachineLearningFeatureSetUsage mutateInstanceForVersion(MachineLearningFeatureSetUsage instance, TransportVersion version) { + if (version.before(TransportVersions.ML_TELEMETRY_MEMORY_ADDED)) { + return new MachineLearningFeatureSetUsage( + instance.available(), + instance.enabled(), + instance.getJobsUsage(), + instance.getDatafeedsUsage(), + instance.getAnalyticsUsage(), + instance.getInferenceUsage(), + Collections.emptyMap(), + instance.getNodeCount() + ); + } + + return instance; + } +} diff --git a/x-pack/plugin/esql/compute/build.gradle b/x-pack/plugin/esql/compute/build.gradle index 81d1a6f5360ca..49e819b7cdc88 100644 --- a/x-pack/plugin/esql/compute/build.gradle +++ b/x-pack/plugin/esql/compute/build.gradle @@ -635,6 +635,11 @@ tasks.named('stringTemplates').configure { it.inputFile = topAggregatorInputFile it.outputFile = "org/elasticsearch/compute/aggregation/TopBooleanAggregator.java" } + template { + it.properties = bytesRefProperties + it.inputFile = topAggregatorInputFile + it.outputFile = "org/elasticsearch/compute/aggregation/TopBytesRefAggregator.java" + } template { it.properties = ipProperties it.inputFile = topAggregatorInputFile diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopBytesRefAggregator.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopBytesRefAggregator.java new file mode 100644 index 0000000000000..c9b0e679b3e64 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopBytesRefAggregator.java @@ -0,0 +1,146 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.aggregation; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.breaker.CircuitBreaker; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.compute.ann.Aggregator; +import org.elasticsearch.compute.ann.GroupingAggregator; +import org.elasticsearch.compute.ann.IntermediateState; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.BytesRefBlock; +import org.elasticsearch.compute.data.IntVector; +import org.elasticsearch.compute.data.sort.BytesRefBucketedSort; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.core.Releasable; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.search.sort.SortOrder; + +/** + * Aggregates the top N field values for BytesRef. + *

+ * This class is generated. Edit `X-TopAggregator.java.st` to edit this file. + *

+ */ +@Aggregator({ @IntermediateState(name = "top", type = "BYTES_REF_BLOCK") }) +@GroupingAggregator +class TopBytesRefAggregator { + public static SingleState initSingle(BigArrays bigArrays, int limit, boolean ascending) { + return new SingleState(bigArrays, limit, ascending); + } + + public static void combine(SingleState state, BytesRef v) { + state.add(v); + } + + public static void combineIntermediate(SingleState state, BytesRefBlock values) { + int start = values.getFirstValueIndex(0); + int end = start + values.getValueCount(0); + var scratch = new BytesRef(); + for (int i = start; i < end; i++) { + combine(state, values.getBytesRef(i, scratch)); + } + } + + public static Block evaluateFinal(SingleState state, DriverContext driverContext) { + return state.toBlock(driverContext.blockFactory()); + } + + public static GroupingState initGrouping(BigArrays bigArrays, int limit, boolean ascending) { + return new GroupingState(bigArrays, limit, ascending); + } + + public static void combine(GroupingState state, int groupId, BytesRef v) { + state.add(groupId, v); + } + + public static void combineIntermediate(GroupingState state, int groupId, BytesRefBlock values, int valuesPosition) { + int start = values.getFirstValueIndex(valuesPosition); + int end = start + values.getValueCount(valuesPosition); + var scratch = new BytesRef(); + for (int i = start; i < end; i++) { + combine(state, groupId, values.getBytesRef(i, scratch)); + } + } + + public static void combineStates(GroupingState current, int groupId, GroupingState state, int statePosition) { + current.merge(groupId, state, statePosition); + } + + public static Block evaluateFinal(GroupingState state, IntVector selected, DriverContext driverContext) { + return state.toBlock(driverContext.blockFactory(), selected); + } + + public static class GroupingState implements Releasable { + private final BytesRefBucketedSort sort; + + private GroupingState(BigArrays bigArrays, int limit, boolean ascending) { + // TODO pass the breaker in from the DriverContext + CircuitBreaker breaker = bigArrays.breakerService().getBreaker(CircuitBreaker.REQUEST); + this.sort = new BytesRefBucketedSort(breaker, "top", bigArrays, ascending ? SortOrder.ASC : SortOrder.DESC, limit); + } + + public void add(int groupId, BytesRef value) { + sort.collect(value, groupId); + } + + public void merge(int groupId, GroupingState other, int otherGroupId) { + sort.merge(groupId, other.sort, otherGroupId); + } + + void toIntermediate(Block[] blocks, int offset, IntVector selected, DriverContext driverContext) { + blocks[offset] = toBlock(driverContext.blockFactory(), selected); + } + + Block toBlock(BlockFactory blockFactory, IntVector selected) { + return sort.toBlock(blockFactory, selected); + } + + void enableGroupIdTracking(SeenGroupIds seen) { + // we figure out seen values from nulls on the values block + } + + @Override + public void close() { + Releasables.closeExpectNoException(sort); + } + } + + public static class SingleState implements Releasable { + private final GroupingState internalState; + + private SingleState(BigArrays bigArrays, int limit, boolean ascending) { + this.internalState = new GroupingState(bigArrays, limit, ascending); + } + + public void add(BytesRef value) { + internalState.add(0, value); + } + + public void merge(GroupingState other) { + internalState.merge(0, other, 0); + } + + void toIntermediate(Block[] blocks, int offset, DriverContext driverContext) { + blocks[offset] = toBlock(driverContext.blockFactory()); + } + + Block toBlock(BlockFactory blockFactory) { + try (var intValues = blockFactory.newConstantIntVector(0, 1)) { + return internalState.toBlock(blockFactory, intValues); + } + } + + @Override + public void close() { + Releasables.closeExpectNoException(internalState); + } + } +} diff --git a/x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/TopBytesRefAggregatorFunction.java b/x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/TopBytesRefAggregatorFunction.java new file mode 100644 index 0000000000000..17b3d84ab0028 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/TopBytesRefAggregatorFunction.java @@ -0,0 +1,174 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License +// 2.0; you may not use this file except in compliance with the Elastic License +// 2.0. +package org.elasticsearch.compute.aggregation; + +import java.lang.Integer; +import java.lang.Override; +import java.lang.String; +import java.lang.StringBuilder; +import java.util.List; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BooleanVector; +import org.elasticsearch.compute.data.BytesRefBlock; +import org.elasticsearch.compute.data.BytesRefVector; +import org.elasticsearch.compute.data.ElementType; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.DriverContext; + +/** + * {@link AggregatorFunction} implementation for {@link TopBytesRefAggregator}. + * This class is generated. Do not edit it. + */ +public final class TopBytesRefAggregatorFunction implements AggregatorFunction { + private static final List INTERMEDIATE_STATE_DESC = List.of( + new IntermediateStateDesc("top", ElementType.BYTES_REF) ); + + private final DriverContext driverContext; + + private final TopBytesRefAggregator.SingleState state; + + private final List channels; + + private final int limit; + + private final boolean ascending; + + public TopBytesRefAggregatorFunction(DriverContext driverContext, List channels, + TopBytesRefAggregator.SingleState state, int limit, boolean ascending) { + this.driverContext = driverContext; + this.channels = channels; + this.state = state; + this.limit = limit; + this.ascending = ascending; + } + + public static TopBytesRefAggregatorFunction create(DriverContext driverContext, + List channels, int limit, boolean ascending) { + return new TopBytesRefAggregatorFunction(driverContext, channels, TopBytesRefAggregator.initSingle(driverContext.bigArrays(), limit, ascending), limit, ascending); + } + + public static List intermediateStateDesc() { + return INTERMEDIATE_STATE_DESC; + } + + @Override + public int intermediateBlockCount() { + return INTERMEDIATE_STATE_DESC.size(); + } + + @Override + public void addRawInput(Page page, BooleanVector mask) { + if (mask.isConstant()) { + if (mask.getBoolean(0) == false) { + // Entire page masked away + return; + } + // No masking + BytesRefBlock block = page.getBlock(channels.get(0)); + BytesRefVector vector = block.asVector(); + if (vector != null) { + addRawVector(vector); + } else { + addRawBlock(block); + } + return; + } + // Some positions masked away, others kept + BytesRefBlock block = page.getBlock(channels.get(0)); + BytesRefVector vector = block.asVector(); + if (vector != null) { + addRawVector(vector, mask); + } else { + addRawBlock(block, mask); + } + } + + private void addRawVector(BytesRefVector vector) { + BytesRef scratch = new BytesRef(); + for (int i = 0; i < vector.getPositionCount(); i++) { + TopBytesRefAggregator.combine(state, vector.getBytesRef(i, scratch)); + } + } + + private void addRawVector(BytesRefVector vector, BooleanVector mask) { + BytesRef scratch = new BytesRef(); + for (int i = 0; i < vector.getPositionCount(); i++) { + if (mask.getBoolean(i) == false) { + continue; + } + TopBytesRefAggregator.combine(state, vector.getBytesRef(i, scratch)); + } + } + + private void addRawBlock(BytesRefBlock block) { + BytesRef scratch = new BytesRef(); + for (int p = 0; p < block.getPositionCount(); p++) { + if (block.isNull(p)) { + continue; + } + int start = block.getFirstValueIndex(p); + int end = start + block.getValueCount(p); + for (int i = start; i < end; i++) { + TopBytesRefAggregator.combine(state, block.getBytesRef(i, scratch)); + } + } + } + + private void addRawBlock(BytesRefBlock block, BooleanVector mask) { + BytesRef scratch = new BytesRef(); + for (int p = 0; p < block.getPositionCount(); p++) { + if (mask.getBoolean(p) == false) { + continue; + } + if (block.isNull(p)) { + continue; + } + int start = block.getFirstValueIndex(p); + int end = start + block.getValueCount(p); + for (int i = start; i < end; i++) { + TopBytesRefAggregator.combine(state, block.getBytesRef(i, scratch)); + } + } + } + + @Override + public void addIntermediateInput(Page page) { + assert channels.size() == intermediateBlockCount(); + assert page.getBlockCount() >= channels.get(0) + intermediateStateDesc().size(); + Block topUncast = page.getBlock(channels.get(0)); + if (topUncast.areAllValuesNull()) { + return; + } + BytesRefBlock top = (BytesRefBlock) topUncast; + assert top.getPositionCount() == 1; + BytesRef scratch = new BytesRef(); + TopBytesRefAggregator.combineIntermediate(state, top); + } + + @Override + public void evaluateIntermediate(Block[] blocks, int offset, DriverContext driverContext) { + state.toIntermediate(blocks, offset, driverContext); + } + + @Override + public void evaluateFinal(Block[] blocks, int offset, DriverContext driverContext) { + blocks[offset] = TopBytesRefAggregator.evaluateFinal(state, driverContext); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(getClass().getSimpleName()).append("["); + sb.append("channels=").append(channels); + sb.append("]"); + return sb.toString(); + } + + @Override + public void close() { + state.close(); + } +} diff --git a/x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/TopBytesRefAggregatorFunctionSupplier.java b/x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/TopBytesRefAggregatorFunctionSupplier.java new file mode 100644 index 0000000000000..8c77d2116bf69 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/TopBytesRefAggregatorFunctionSupplier.java @@ -0,0 +1,45 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License +// 2.0; you may not use this file except in compliance with the Elastic License +// 2.0. +package org.elasticsearch.compute.aggregation; + +import java.lang.Integer; +import java.lang.Override; +import java.lang.String; +import java.util.List; +import org.elasticsearch.compute.operator.DriverContext; + +/** + * {@link AggregatorFunctionSupplier} implementation for {@link TopBytesRefAggregator}. + * This class is generated. Do not edit it. + */ +public final class TopBytesRefAggregatorFunctionSupplier implements AggregatorFunctionSupplier { + private final List channels; + + private final int limit; + + private final boolean ascending; + + public TopBytesRefAggregatorFunctionSupplier(List channels, int limit, + boolean ascending) { + this.channels = channels; + this.limit = limit; + this.ascending = ascending; + } + + @Override + public TopBytesRefAggregatorFunction aggregator(DriverContext driverContext) { + return TopBytesRefAggregatorFunction.create(driverContext, channels, limit, ascending); + } + + @Override + public TopBytesRefGroupingAggregatorFunction groupingAggregator(DriverContext driverContext) { + return TopBytesRefGroupingAggregatorFunction.create(channels, driverContext, limit, ascending); + } + + @Override + public String describe() { + return "top of bytes"; + } +} diff --git a/x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/TopBytesRefGroupingAggregatorFunction.java b/x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/TopBytesRefGroupingAggregatorFunction.java new file mode 100644 index 0000000000000..aa2d6094c8c3f --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/TopBytesRefGroupingAggregatorFunction.java @@ -0,0 +1,221 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License +// 2.0; you may not use this file except in compliance with the Elastic License +// 2.0. +package org.elasticsearch.compute.aggregation; + +import java.lang.Integer; +import java.lang.Override; +import java.lang.String; +import java.lang.StringBuilder; +import java.util.List; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BytesRefBlock; +import org.elasticsearch.compute.data.BytesRefVector; +import org.elasticsearch.compute.data.ElementType; +import org.elasticsearch.compute.data.IntBlock; +import org.elasticsearch.compute.data.IntVector; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.DriverContext; + +/** + * {@link GroupingAggregatorFunction} implementation for {@link TopBytesRefAggregator}. + * This class is generated. Do not edit it. + */ +public final class TopBytesRefGroupingAggregatorFunction implements GroupingAggregatorFunction { + private static final List INTERMEDIATE_STATE_DESC = List.of( + new IntermediateStateDesc("top", ElementType.BYTES_REF) ); + + private final TopBytesRefAggregator.GroupingState state; + + private final List channels; + + private final DriverContext driverContext; + + private final int limit; + + private final boolean ascending; + + public TopBytesRefGroupingAggregatorFunction(List channels, + TopBytesRefAggregator.GroupingState state, DriverContext driverContext, int limit, + boolean ascending) { + this.channels = channels; + this.state = state; + this.driverContext = driverContext; + this.limit = limit; + this.ascending = ascending; + } + + public static TopBytesRefGroupingAggregatorFunction create(List channels, + DriverContext driverContext, int limit, boolean ascending) { + return new TopBytesRefGroupingAggregatorFunction(channels, TopBytesRefAggregator.initGrouping(driverContext.bigArrays(), limit, ascending), driverContext, limit, ascending); + } + + public static List intermediateStateDesc() { + return INTERMEDIATE_STATE_DESC; + } + + @Override + public int intermediateBlockCount() { + return INTERMEDIATE_STATE_DESC.size(); + } + + @Override + public GroupingAggregatorFunction.AddInput prepareProcessPage(SeenGroupIds seenGroupIds, + Page page) { + BytesRefBlock valuesBlock = page.getBlock(channels.get(0)); + BytesRefVector valuesVector = valuesBlock.asVector(); + if (valuesVector == null) { + if (valuesBlock.mayHaveNulls()) { + state.enableGroupIdTracking(seenGroupIds); + } + return new GroupingAggregatorFunction.AddInput() { + @Override + public void add(int positionOffset, IntBlock groupIds) { + addRawInput(positionOffset, groupIds, valuesBlock); + } + + @Override + public void add(int positionOffset, IntVector groupIds) { + addRawInput(positionOffset, groupIds, valuesBlock); + } + + @Override + public void close() { + } + }; + } + return new GroupingAggregatorFunction.AddInput() { + @Override + public void add(int positionOffset, IntBlock groupIds) { + addRawInput(positionOffset, groupIds, valuesVector); + } + + @Override + public void add(int positionOffset, IntVector groupIds) { + addRawInput(positionOffset, groupIds, valuesVector); + } + + @Override + public void close() { + } + }; + } + + private void addRawInput(int positionOffset, IntVector groups, BytesRefBlock values) { + BytesRef scratch = new BytesRef(); + for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++) { + int groupId = groups.getInt(groupPosition); + if (values.isNull(groupPosition + positionOffset)) { + continue; + } + int valuesStart = values.getFirstValueIndex(groupPosition + positionOffset); + int valuesEnd = valuesStart + values.getValueCount(groupPosition + positionOffset); + for (int v = valuesStart; v < valuesEnd; v++) { + TopBytesRefAggregator.combine(state, groupId, values.getBytesRef(v, scratch)); + } + } + } + + private void addRawInput(int positionOffset, IntVector groups, BytesRefVector values) { + BytesRef scratch = new BytesRef(); + for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++) { + int groupId = groups.getInt(groupPosition); + TopBytesRefAggregator.combine(state, groupId, values.getBytesRef(groupPosition + positionOffset, scratch)); + } + } + + private void addRawInput(int positionOffset, IntBlock groups, BytesRefBlock values) { + BytesRef scratch = new BytesRef(); + for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++) { + if (groups.isNull(groupPosition)) { + continue; + } + int groupStart = groups.getFirstValueIndex(groupPosition); + int groupEnd = groupStart + groups.getValueCount(groupPosition); + for (int g = groupStart; g < groupEnd; g++) { + int groupId = groups.getInt(g); + if (values.isNull(groupPosition + positionOffset)) { + continue; + } + int valuesStart = values.getFirstValueIndex(groupPosition + positionOffset); + int valuesEnd = valuesStart + values.getValueCount(groupPosition + positionOffset); + for (int v = valuesStart; v < valuesEnd; v++) { + TopBytesRefAggregator.combine(state, groupId, values.getBytesRef(v, scratch)); + } + } + } + } + + private void addRawInput(int positionOffset, IntBlock groups, BytesRefVector values) { + BytesRef scratch = new BytesRef(); + for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++) { + if (groups.isNull(groupPosition)) { + continue; + } + int groupStart = groups.getFirstValueIndex(groupPosition); + int groupEnd = groupStart + groups.getValueCount(groupPosition); + for (int g = groupStart; g < groupEnd; g++) { + int groupId = groups.getInt(g); + TopBytesRefAggregator.combine(state, groupId, values.getBytesRef(groupPosition + positionOffset, scratch)); + } + } + } + + @Override + public void selectedMayContainUnseenGroups(SeenGroupIds seenGroupIds) { + state.enableGroupIdTracking(seenGroupIds); + } + + @Override + public void addIntermediateInput(int positionOffset, IntVector groups, Page page) { + state.enableGroupIdTracking(new SeenGroupIds.Empty()); + assert channels.size() == intermediateBlockCount(); + Block topUncast = page.getBlock(channels.get(0)); + if (topUncast.areAllValuesNull()) { + return; + } + BytesRefBlock top = (BytesRefBlock) topUncast; + BytesRef scratch = new BytesRef(); + for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++) { + int groupId = groups.getInt(groupPosition); + TopBytesRefAggregator.combineIntermediate(state, groupId, top, groupPosition + positionOffset); + } + } + + @Override + public void addIntermediateRowInput(int groupId, GroupingAggregatorFunction input, int position) { + if (input.getClass() != getClass()) { + throw new IllegalArgumentException("expected " + getClass() + "; got " + input.getClass()); + } + TopBytesRefAggregator.GroupingState inState = ((TopBytesRefGroupingAggregatorFunction) input).state; + state.enableGroupIdTracking(new SeenGroupIds.Empty()); + TopBytesRefAggregator.combineStates(state, groupId, inState, position); + } + + @Override + public void evaluateIntermediate(Block[] blocks, int offset, IntVector selected) { + state.toIntermediate(blocks, offset, selected, driverContext); + } + + @Override + public void evaluateFinal(Block[] blocks, int offset, IntVector selected, + DriverContext driverContext) { + blocks[offset] = TopBytesRefAggregator.evaluateFinal(state, selected, driverContext); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(getClass().getSimpleName()).append("["); + sb.append("channels=").append(channels); + sb.append("]"); + return sb.toString(); + } + + @Override + public void close() { + state.close(); + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/X-TopAggregator.java.st b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/X-TopAggregator.java.st index b97d26ee6147d..18d573eea4a4c 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/X-TopAggregator.java.st +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/X-TopAggregator.java.st @@ -7,9 +7,12 @@ package org.elasticsearch.compute.aggregation; -$if(Ip)$ +$if(BytesRef || Ip)$ import org.apache.lucene.util.BytesRef; $endif$ +$if(BytesRef)$ +import org.elasticsearch.common.breaker.CircuitBreaker; +$endif$ import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.compute.ann.Aggregator; import org.elasticsearch.compute.ann.GroupingAggregator; @@ -49,7 +52,7 @@ class Top$Name$Aggregator { public static void combineIntermediate(SingleState state, $Type$Block values) { int start = values.getFirstValueIndex(0); int end = start + values.getValueCount(0); -$if(Ip)$ +$if(BytesRef || Ip)$ var scratch = new BytesRef(); for (int i = start; i < end; i++) { combine(state, values.get$Type$(i, scratch)); @@ -76,7 +79,7 @@ $endif$ public static void combineIntermediate(GroupingState state, int groupId, $Type$Block values, int valuesPosition) { int start = values.getFirstValueIndex(valuesPosition); int end = start + values.getValueCount(valuesPosition); -$if(Ip)$ +$if(BytesRef || Ip)$ var scratch = new BytesRef(); for (int i = start; i < end; i++) { combine(state, groupId, values.get$Type$(i, scratch)); @@ -100,7 +103,13 @@ $endif$ private final $Name$BucketedSort sort; private GroupingState(BigArrays bigArrays, int limit, boolean ascending) { +$if(BytesRef)$ + // TODO pass the breaker in from the DriverContext + CircuitBreaker breaker = bigArrays.breakerService().getBreaker(CircuitBreaker.REQUEST); + this.sort = new BytesRefBucketedSort(breaker, "top", bigArrays, ascending ? SortOrder.ASC : SortOrder.DESC, limit); +$else$ this.sort = new $Name$BucketedSort(bigArrays, ascending ? SortOrder.ASC : SortOrder.DESC, limit); +$endif$ } public void add(int groupId, $type$ value) { diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/sort/BucketedSortCommon.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/sort/BucketedSortCommon.java new file mode 100644 index 0000000000000..58306f2140a82 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/sort/BucketedSortCommon.java @@ -0,0 +1,68 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.data.sort; + +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.common.util.BitArray; +import org.elasticsearch.core.Releasable; +import org.elasticsearch.search.sort.SortOrder; + +/** + * Components common to BucketedSort implementations. + */ +class BucketedSortCommon implements Releasable { + final BigArrays bigArrays; + final SortOrder order; + final int bucketSize; + + /** + * {@code true} if the bucket is in heap mode, {@code false} if + * it is still gathering. + */ + private final BitArray heapMode; + + BucketedSortCommon(BigArrays bigArrays, SortOrder order, int bucketSize) { + this.bigArrays = bigArrays; + this.order = order; + this.bucketSize = bucketSize; + this.heapMode = new BitArray(0, bigArrays); + } + + /** + * The first index in a bucket. Note that this might not be used. + * See {@link } + */ + long rootIndex(int bucket) { + return (long) bucket * bucketSize; + } + + /** + * The last index in a bucket. + */ + long endIndex(long rootIndex) { + return rootIndex + bucketSize; + } + + boolean inHeapMode(int bucket) { + return heapMode.get(bucket); + } + + void enableHeapMode(int bucket) { + heapMode.set(bucket); + } + + void assertValidNextOffset(int next) { + assert 0 <= next && next < bucketSize + : "Expected next to be in the range of valid buckets [0 <= " + next + " < " + bucketSize + "]"; + } + + @Override + public void close() { + heapMode.close(); + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/sort/BytesRefBucketedSort.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/sort/BytesRefBucketedSort.java new file mode 100644 index 0000000000000..9198de53b1e04 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/sort/BytesRefBucketedSort.java @@ -0,0 +1,386 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.data.sort; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.breaker.CircuitBreaker; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.common.util.ByteUtils; +import org.elasticsearch.common.util.ObjectArray; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.IntVector; +import org.elasticsearch.compute.operator.BreakingBytesRefBuilder; +import org.elasticsearch.core.Assertions; +import org.elasticsearch.core.Releasable; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.search.sort.BucketedSort; +import org.elasticsearch.search.sort.SortOrder; + +import java.util.Arrays; +import java.util.stream.IntStream; +import java.util.stream.LongStream; + +/** + * Aggregates the top N variable length {@link BytesRef} values per bucket. + * See {@link BucketedSort} for more information. + */ +public class BytesRefBucketedSort implements Releasable { + private final BucketedSortCommon common; + private final CircuitBreaker breaker; + private final String label; + + /** + * An array containing all the values on all buckets. The structure is as follows: + *

+ * For each bucket, there are {@link BucketedSortCommon#bucketSize} elements, based + * on the bucket id (0, 1, 2...). Then, for each bucket, it can be in 2 states: + *

+ *
    + *
  • + * Gather mode: All buckets start in gather mode, and remain here while they have + * less than bucketSize elements. In gather mode, the elements are stored in the + * array from the highest index to the lowest index. The lowest index contains + * the offset to the next slot to be filled. + *

    + * This allows us to insert elements in O(1) time. + *

    + *

    + * When the bucketSize-th element is collected, the bucket transitions to heap + * mode, by heapifying its contents. + *

    + *
  • + *
  • + * Heap mode: The bucket slots are organized as a min heap structure. + *

    + * The root of the heap is the minimum value in the bucket, + * which allows us to quickly discard new values that are not in the top N. + *

    + *
  • + *
+ */ + private ObjectArray values; + + public BytesRefBucketedSort(CircuitBreaker breaker, String label, BigArrays bigArrays, SortOrder order, int bucketSize) { + this.breaker = breaker; + this.label = label; + common = new BucketedSortCommon(bigArrays, order, bucketSize); + boolean success = false; + try { + values = bigArrays.newObjectArray(0); + success = true; + } finally { + if (success == false) { + close(); + } + } + } + + private void checkInvariant(int bucket) { + if (Assertions.ENABLED == false) { + return; + } + long rootIndex = common.rootIndex(bucket); + long requiredSize = common.endIndex(rootIndex); + if (values.size() < requiredSize) { + throw new AssertionError("values too short " + values.size() + " < " + requiredSize); + } + if (values.get(rootIndex) == null) { + throw new AssertionError("new gather offset can't be null"); + } + if (common.inHeapMode(bucket) == false) { + common.assertValidNextOffset(getNextGatherOffset(rootIndex)); + } else { + for (long l = rootIndex; l < common.endIndex(rootIndex); l++) { + if (values.get(rootIndex) == null) { + throw new AssertionError("values missing in heap mode"); + } + } + } + } + + /** + * Collects a {@code value} into a {@code bucket}. + *

+ * It may or may not be inserted in the heap, depending on if it is better than the current root. + *

+ */ + public void collect(BytesRef value, int bucket) { + long rootIndex = common.rootIndex(bucket); + if (common.inHeapMode(bucket)) { + if (betterThan(value, values.get(rootIndex).bytesRefView())) { + clearedBytesAt(rootIndex).append(value); + downHeap(rootIndex, 0); + } + checkInvariant(bucket); + return; + } + // Gathering mode + long requiredSize = common.endIndex(rootIndex); + if (values.size() < requiredSize) { + grow(requiredSize); + } + int next = getNextGatherOffset(rootIndex); + common.assertValidNextOffset(next); + long index = next + rootIndex; + clearedBytesAt(index).append(value); + if (next == 0) { + common.enableHeapMode(bucket); + heapify(rootIndex); + } else { + ByteUtils.writeIntLE(next - 1, values.get(rootIndex).bytes(), 0); + } + checkInvariant(bucket); + } + + /** + * Merge the values from {@code other}'s {@code otherGroupId} into {@code groupId}. + */ + public void merge(int bucket, BytesRefBucketedSort other, int otherBucket) { + long otherRootIndex = other.common.rootIndex(otherBucket); + if (otherRootIndex >= other.values.size()) { + // The value was never collected. + return; + } + other.checkInvariant(bucket); + long otherStart = other.startIndex(otherBucket, otherRootIndex); + long otherEnd = other.common.endIndex(otherRootIndex); + // TODO: This can be improved for heapified buckets by making use of the heap structures + for (long i = otherStart; i < otherEnd; i++) { + collect(other.values.get(i).bytesRefView(), bucket); + } + } + + /** + * Creates a block with the values from the {@code selected} groups. + */ + public Block toBlock(BlockFactory blockFactory, IntVector selected) { + // Check if the selected groups are all empty, to avoid allocating extra memory + if (IntStream.range(0, selected.getPositionCount()).map(selected::getInt).noneMatch(bucket -> { + long rootIndex = common.rootIndex(bucket); + if (rootIndex >= values.size()) { + // Never collected + return false; + } + long start = startIndex(bucket, rootIndex); + long end = common.endIndex(rootIndex); + long size = end - start; + return size > 0; + })) { + return blockFactory.newConstantNullBlock(selected.getPositionCount()); + } + + // Used to sort the values in the bucket. + BytesRef[] bucketValues = new BytesRef[common.bucketSize]; + + try (var builder = blockFactory.newBytesRefBlockBuilder(selected.getPositionCount())) { + for (int s = 0; s < selected.getPositionCount(); s++) { + int bucket = selected.getInt(s); + long rootIndex = common.rootIndex(bucket); + if (rootIndex >= values.size()) { + // Never collected + builder.appendNull(); + continue; + } + + long start = startIndex(bucket, rootIndex); + long end = common.endIndex(rootIndex); + long size = end - start; + + if (size == 0) { + builder.appendNull(); + continue; + } + + if (size == 1) { + try (BreakingBytesRefBuilder bytes = values.get(start)) { + builder.appendBytesRef(bytes.bytesRefView()); + } + values.set(start, null); + continue; + } + + for (int i = 0; i < size; i++) { + try (BreakingBytesRefBuilder bytes = values.get(start + i)) { + bucketValues[i] = bytes.bytesRefView(); + } + values.set(start + i, null); + } + + // TODO: Make use of heap structures to faster iterate in order instead of copying and sorting + Arrays.sort(bucketValues, 0, (int) size); + + builder.beginPositionEntry(); + if (common.order == SortOrder.ASC) { + for (int i = 0; i < size; i++) { + builder.appendBytesRef(bucketValues[i]); + } + } else { + for (int i = (int) size - 1; i >= 0; i--) { + builder.appendBytesRef(bucketValues[i]); + } + } + builder.endPositionEntry(); + } + return builder.build(); + } + } + + private long startIndex(int bucket, long rootIndex) { + if (common.inHeapMode(bucket)) { + return rootIndex; + } + return rootIndex + getNextGatherOffset(rootIndex) + 1; + } + + /** + * Get the next index that should be "gathered" for a bucket rooted + * at {@code rootIndex}. + *

+ * Using the first 4 bytes of the element to store the next gather offset. + *

+ */ + private int getNextGatherOffset(long rootIndex) { + BreakingBytesRefBuilder bytes = values.get(rootIndex); + assert bytes.length() == Integer.BYTES; + return ByteUtils.readIntLE(bytes.bytes(), 0); + } + + /** + * {@code true} if the entry at index {@code lhs} is "better" than + * the entry at {@code rhs}. "Better" in this means "lower" for + * {@link SortOrder#ASC} and "higher" for {@link SortOrder#DESC}. + */ + private boolean betterThan(BytesRef lhs, BytesRef rhs) { + return common.order.reverseMul() * lhs.compareTo(rhs) < 0; + } + + /** + * Swap the data at two indices. + */ + private void swap(long lhs, long rhs) { + BreakingBytesRefBuilder tmp = values.get(lhs); + values.set(lhs, values.get(rhs)); + values.set(rhs, tmp); + } + + /** + * Allocate storage for more buckets and store the "next gather offset" + * for those new buckets. + */ + private void grow(long requiredSize) { + long oldMax = values.size(); + values = common.bigArrays.grow(values, requiredSize); + // Set the next gather offsets for all newly allocated buckets. + fillGatherOffsets(oldMax - (oldMax % common.bucketSize)); + } + + /** + * Maintain the "next gather offsets" for newly allocated buckets. + */ + private void fillGatherOffsets(long startingAt) { + assert startingAt % common.bucketSize == 0; + int nextOffset = common.bucketSize - 1; + for (long bucketRoot = startingAt; bucketRoot < values.size(); bucketRoot += common.bucketSize) { + BreakingBytesRefBuilder bytes = values.get(bucketRoot); + if (bytes != null) { + continue; + } + bytes = new BreakingBytesRefBuilder(breaker, label); + values.set(bucketRoot, bytes); + bytes.grow(Integer.BYTES); + bytes.setLength(Integer.BYTES); + ByteUtils.writeIntLE(nextOffset, bytes.bytes(), 0); + } + } + + /** + * Heapify a bucket whose entries are in random order. + *

+ * This works by validating the heap property on each node, iterating + * "upwards", pushing any out of order parents "down". Check out the + * wikipedia + * entry on binary heaps for more about this. + *

+ *

+ * While this *looks* like it could easily be {@code O(n * log n)}, it is + * a fairly well studied algorithm attributed to Floyd. There's + * been a bunch of work that puts this at {@code O(n)}, close to 1.88n worst + * case. + *

+ * + * @param rootIndex the index the start of the bucket + */ + private void heapify(long rootIndex) { + int maxParent = common.bucketSize / 2 - 1; + for (int parent = maxParent; parent >= 0; parent--) { + downHeap(rootIndex, parent); + } + } + + /** + * Correct the heap invariant of a parent and its children. This + * runs in {@code O(log n)} time. + * @param rootIndex index of the start of the bucket + * @param parent Index within the bucket of the parent to check. + * For example, 0 is the "root". + */ + private void downHeap(long rootIndex, int parent) { + while (true) { + long parentIndex = rootIndex + parent; + int worst = parent; + long worstIndex = parentIndex; + int leftChild = parent * 2 + 1; + long leftIndex = rootIndex + leftChild; + if (leftChild < common.bucketSize) { + if (betterThan(values.get(worstIndex).bytesRefView(), values.get(leftIndex).bytesRefView())) { + worst = leftChild; + worstIndex = leftIndex; + } + int rightChild = leftChild + 1; + long rightIndex = rootIndex + rightChild; + if (rightChild < common.bucketSize + && betterThan(values.get(worstIndex).bytesRefView(), values.get(rightIndex).bytesRefView())) { + + worst = rightChild; + worstIndex = rightIndex; + } + } + if (worst == parent) { + break; + } + swap(worstIndex, parentIndex); + parent = worst; + } + } + + private BreakingBytesRefBuilder clearedBytesAt(long index) { + BreakingBytesRefBuilder bytes = values.get(index); + if (bytes == null) { + bytes = new BreakingBytesRefBuilder(breaker, label); + values.set(index, bytes); + } else { + bytes.clear(); + } + return bytes; + } + + @Override + public final void close() { + Releasable allValues = values == null ? () -> {} : Releasables.wrap(LongStream.range(0, values.size()).mapToObj(i -> { + BreakingBytesRefBuilder bytes = values.get(i); + return bytes == null ? (Releasable) () -> {} : bytes; + }).toList().iterator()); + Releasables.close(allValues, values, common); + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/sort/IpBucketedSort.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/sort/IpBucketedSort.java index 0fd38c18d7504..4eb31ea30db22 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/sort/IpBucketedSort.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/sort/IpBucketedSort.java @@ -9,7 +9,6 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.util.BigArrays; -import org.elasticsearch.common.util.BitArray; import org.elasticsearch.common.util.ByteArray; import org.elasticsearch.common.util.ByteUtils; import org.elasticsearch.compute.data.Block; @@ -29,7 +28,7 @@ * See {@link BucketedSort} for more information. */ public class IpBucketedSort implements Releasable { - private static final int IP_LENGTH = 16; + private static final int IP_LENGTH = 16; // Bytes. It's ipv6. // BytesRefs used in internal methods private final BytesRef scratch1 = new BytesRef(); @@ -39,18 +38,11 @@ public class IpBucketedSort implements Releasable { */ private final byte[] scratchBytes = new byte[IP_LENGTH]; - private final BigArrays bigArrays; - private final SortOrder order; - private final int bucketSize; - /** - * {@code true} if the bucket is in heap mode, {@code false} if - * it is still gathering. - */ - private final BitArray heapMode; + private final BucketedSortCommon common; /** * An array containing all the values on all buckets. The structure is as follows: *

- * For each bucket, there are bucketSize elements, based on the bucket id (0, 1, 2...). + * For each bucket, there are {@link BucketedSortCommon#bucketSize} elements, based on the bucket id (0, 1, 2...). * Then, for each bucket, it can be in 2 states: *

*
    @@ -77,10 +69,7 @@ public class IpBucketedSort implements Releasable { private ByteArray values; public IpBucketedSort(BigArrays bigArrays, SortOrder order, int bucketSize) { - this.bigArrays = bigArrays; - this.order = order; - this.bucketSize = bucketSize; - heapMode = new BitArray(0, bigArrays); + this.common = new BucketedSortCommon(bigArrays, order, bucketSize); boolean success = false; try { @@ -101,8 +90,8 @@ public IpBucketedSort(BigArrays bigArrays, SortOrder order, int bucketSize) { */ public void collect(BytesRef value, int bucket) { assert value.length == IP_LENGTH; - long rootIndex = (long) bucket * bucketSize; - if (inHeapMode(bucket)) { + long rootIndex = common.rootIndex(bucket); + if (common.inHeapMode(bucket)) { if (betterThan(value, get(rootIndex, scratch1))) { set(rootIndex, value); downHeap(rootIndex, 0); @@ -110,49 +99,34 @@ public void collect(BytesRef value, int bucket) { return; } // Gathering mode - long requiredSize = (rootIndex + bucketSize) * IP_LENGTH; + long requiredSize = common.endIndex(rootIndex) * IP_LENGTH; if (values.size() < requiredSize) { grow(requiredSize); } int next = getNextGatherOffset(rootIndex); - assert 0 <= next && next < bucketSize - : "Expected next to be in the range of valid buckets [0 <= " + next + " < " + bucketSize + "]"; + common.assertValidNextOffset(next); long index = next + rootIndex; set(index, value); if (next == 0) { - heapMode.set(bucket); + common.enableHeapMode(bucket); heapify(rootIndex); } else { setNextGatherOffset(rootIndex, next - 1); } } - /** - * The order of the sort. - */ - public SortOrder getOrder() { - return order; - } - - /** - * The number of values to store per bucket. - */ - public int getBucketSize() { - return bucketSize; - } - /** * Get the first and last indexes (inclusive, exclusive) of the values for a bucket. * Returns [0, 0] if the bucket has never been collected. */ private Tuple getBucketValuesIndexes(int bucket) { - long rootIndex = (long) bucket * bucketSize; + long rootIndex = common.rootIndex(bucket); if (rootIndex >= values.size() / IP_LENGTH) { // We've never seen this bucket. return Tuple.tuple(0L, 0L); } - long start = inHeapMode(bucket) ? rootIndex : (rootIndex + getNextGatherOffset(rootIndex) + 1); - long end = rootIndex + bucketSize; + long start = startIndex(bucket, rootIndex); + long end = common.endIndex(rootIndex); return Tuple.tuple(start, end); } @@ -184,7 +158,7 @@ public Block toBlock(BlockFactory blockFactory, IntVector selected) { } // Used to sort the values in the bucket. - var bucketValues = new BytesRef[bucketSize]; + var bucketValues = new BytesRef[common.bucketSize]; try (var builder = blockFactory.newBytesRefBlockBuilder(selected.getPositionCount())) { for (int s = 0; s < selected.getPositionCount(); s++) { @@ -211,7 +185,7 @@ public Block toBlock(BlockFactory blockFactory, IntVector selected) { Arrays.sort(bucketValues, 0, (int) size); builder.beginPositionEntry(); - if (order == SortOrder.ASC) { + if (common.order == SortOrder.ASC) { for (int i = 0; i < size; i++) { builder.appendBytesRef(bucketValues[i]); } @@ -226,11 +200,11 @@ public Block toBlock(BlockFactory blockFactory, IntVector selected) { } } - /** - * Is this bucket a min heap {@code true} or in gathering mode {@code false}? - */ - private boolean inHeapMode(int bucket) { - return heapMode.get(bucket); + private long startIndex(int bucket, long rootIndex) { + if (common.inHeapMode(bucket)) { + return rootIndex; + } + return rootIndex + getNextGatherOffset(rootIndex) + 1; } /** @@ -267,7 +241,7 @@ private void setNextGatherOffset(long rootIndex, int offset) { * {@link SortOrder#ASC} and "higher" for {@link SortOrder#DESC}. */ private boolean betterThan(BytesRef lhs, BytesRef rhs) { - return getOrder().reverseMul() * lhs.compareTo(rhs) < 0; + return common.order.reverseMul() * lhs.compareTo(rhs) < 0; } /** @@ -296,17 +270,17 @@ private void swap(long lhs, long rhs) { */ private void grow(long minSize) { long oldMax = values.size() / IP_LENGTH; - values = bigArrays.grow(values, minSize); + values = common.bigArrays.grow(values, minSize); // Set the next gather offsets for all newly allocated buckets. - setNextGatherOffsets(oldMax - (oldMax % bucketSize)); + setNextGatherOffsets(oldMax - (oldMax % common.bucketSize)); } /** * Maintain the "next gather offsets" for newly allocated buckets. */ private void setNextGatherOffsets(long startingAt) { - int nextOffset = bucketSize - 1; - for (long bucketRoot = startingAt; bucketRoot < values.size() / IP_LENGTH; bucketRoot += bucketSize) { + int nextOffset = common.bucketSize - 1; + for (long bucketRoot = startingAt; bucketRoot < values.size() / IP_LENGTH; bucketRoot += common.bucketSize) { setNextGatherOffset(bucketRoot, nextOffset); } } @@ -334,7 +308,7 @@ private void setNextGatherOffsets(long startingAt) { * @param rootIndex the index the start of the bucket */ private void heapify(long rootIndex) { - int maxParent = bucketSize / 2 - 1; + int maxParent = common.bucketSize / 2 - 1; for (int parent = maxParent; parent >= 0; parent--) { downHeap(rootIndex, parent); } @@ -354,14 +328,14 @@ private void downHeap(long rootIndex, int parent) { long worstIndex = parentIndex; int leftChild = parent * 2 + 1; long leftIndex = rootIndex + leftChild; - if (leftChild < bucketSize) { + if (leftChild < common.bucketSize) { if (betterThan(get(worstIndex, scratch1), get(leftIndex, scratch2))) { worst = leftChild; worstIndex = leftIndex; } int rightChild = leftChild + 1; long rightIndex = rootIndex + rightChild; - if (rightChild < bucketSize && betterThan(get(worstIndex, scratch1), get(rightIndex, scratch2))) { + if (rightChild < common.bucketSize && betterThan(get(worstIndex, scratch1), get(rightIndex, scratch2))) { worst = rightChild; worstIndex = rightIndex; } @@ -400,6 +374,6 @@ private void set(long index, BytesRef value) { @Override public final void close() { - Releasables.close(values, heapMode); + Releasables.close(values, common); } } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/AbstractTopBytesRefAggregatorFunctionTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/AbstractTopBytesRefAggregatorFunctionTests.java new file mode 100644 index 0000000000000..2815dd70e8124 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/AbstractTopBytesRefAggregatorFunctionTests.java @@ -0,0 +1,37 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.aggregation; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.BlockUtils; +import org.elasticsearch.compute.operator.SequenceBytesRefBlockSourceOperator; +import org.elasticsearch.compute.operator.SourceOperator; + +import java.util.List; +import java.util.stream.IntStream; + +import static org.hamcrest.Matchers.contains; + +abstract class AbstractTopBytesRefAggregatorFunctionTests extends AggregatorFunctionTestCase { + static final int LIMIT = 100; + + @Override + protected final SourceOperator simpleInput(BlockFactory blockFactory, int size) { + return new SequenceBytesRefBlockSourceOperator(blockFactory, IntStream.range(0, size).mapToObj(l -> randomValue())); + } + + protected abstract BytesRef randomValue(); + + @Override + public final void assertSimpleOutput(List input, Block result) { + Object[] values = input.stream().flatMap(AggregatorFunctionTestCase::allBytesRefs).sorted().limit(LIMIT).toArray(Object[]::new); + assertThat((List) BlockUtils.toJavaObject(result, 0), contains(values)); + } +} diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/AbstractTopBytesRefGroupingAggregatorFunctionTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/AbstractTopBytesRefGroupingAggregatorFunctionTests.java new file mode 100644 index 0000000000000..45c8a23dfc1c0 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/AbstractTopBytesRefGroupingAggregatorFunctionTests.java @@ -0,0 +1,49 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.aggregation; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.BlockUtils; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.LongBytesRefTupleBlockSourceOperator; +import org.elasticsearch.compute.operator.SourceOperator; +import org.elasticsearch.core.Tuple; + +import java.util.List; +import java.util.stream.IntStream; + +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.equalTo; + +public abstract class AbstractTopBytesRefGroupingAggregatorFunctionTests extends GroupingAggregatorFunctionTestCase { + static final int LIMIT = 100; + + @Override + protected final SourceOperator simpleInput(BlockFactory blockFactory, int size) { + return new LongBytesRefTupleBlockSourceOperator( + blockFactory, + IntStream.range(0, size).mapToObj(l -> Tuple.tuple(randomLongBetween(0, 4), randomValue())) + ); + } + + protected abstract BytesRef randomValue(); + + @Override + protected final void assertSimpleGroup(List input, Block result, int position, Long group) { + Object[] values = input.stream().flatMap(b -> allBytesRefs(b, group)).sorted().limit(LIMIT).toArray(Object[]::new); + if (values.length == 0) { + assertThat(result.isNull(position), equalTo(true)); + } else if (values.length == 1) { + assertThat(BlockUtils.toJavaObject(result, position), equalTo(values[0])); + } else { + assertThat((List) BlockUtils.toJavaObject(result, position), contains(values)); + } + } +} diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/TopBytesRefAggregatorFunctionTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/TopBytesRefAggregatorFunctionTests.java new file mode 100644 index 0000000000000..732229c98f9c7 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/TopBytesRefAggregatorFunctionTests.java @@ -0,0 +1,29 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.aggregation; + +import org.apache.lucene.util.BytesRef; + +import java.util.List; + +public class TopBytesRefAggregatorFunctionTests extends AbstractTopBytesRefAggregatorFunctionTests { + @Override + protected BytesRef randomValue() { + return new BytesRef(randomAlphaOfLength(10)); + } + + @Override + protected AggregatorFunctionSupplier aggregatorFunction(List inputChannels) { + return new TopBytesRefAggregatorFunctionSupplier(inputChannels, LIMIT, true); + } + + @Override + protected String expectedDescriptionOfAggregator() { + return "top of bytes"; + } +} diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/TopBytesRefGroupingAggregatorFunctionTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/TopBytesRefGroupingAggregatorFunctionTests.java new file mode 100644 index 0000000000000..4932e1abef46d --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/TopBytesRefGroupingAggregatorFunctionTests.java @@ -0,0 +1,35 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.aggregation; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.xpack.esql.core.type.DataType; + +import java.util.List; + +public class TopBytesRefGroupingAggregatorFunctionTests extends AbstractTopBytesRefGroupingAggregatorFunctionTests { + @Override + protected BytesRef randomValue() { + return new BytesRef(randomAlphaOfLength(6)); + } + + @Override + protected final AggregatorFunctionSupplier aggregatorFunction(List inputChannels) { + return new TopBytesRefAggregatorFunctionSupplier(inputChannels, LIMIT, true); + } + + @Override + protected DataType acceptedDataType() { + return DataType.KEYWORD; + } + + @Override + protected String expectedDescriptionOfAggregator() { + return "top of bytes"; + } +} diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/TopIpAggregatorFunctionTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/TopIpAggregatorFunctionTests.java index 1594f66ed9fe2..840e4cf9af961 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/TopIpAggregatorFunctionTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/TopIpAggregatorFunctionTests.java @@ -9,26 +9,13 @@ import org.apache.lucene.document.InetAddressPoint; import org.apache.lucene.util.BytesRef; -import org.elasticsearch.compute.data.Block; -import org.elasticsearch.compute.data.BlockFactory; -import org.elasticsearch.compute.data.BlockUtils; -import org.elasticsearch.compute.operator.SequenceBytesRefBlockSourceOperator; -import org.elasticsearch.compute.operator.SourceOperator; import java.util.List; -import java.util.stream.IntStream; - -import static org.hamcrest.Matchers.contains; - -public class TopIpAggregatorFunctionTests extends AggregatorFunctionTestCase { - private static final int LIMIT = 100; +public class TopIpAggregatorFunctionTests extends AbstractTopBytesRefAggregatorFunctionTests { @Override - protected SourceOperator simpleInput(BlockFactory blockFactory, int size) { - return new SequenceBytesRefBlockSourceOperator( - blockFactory, - IntStream.range(0, size).mapToObj(l -> new BytesRef(InetAddressPoint.encode(randomIp(randomBoolean())))) - ); + protected BytesRef randomValue() { + return new BytesRef(InetAddressPoint.encode(randomIp(randomBoolean()))); } @Override @@ -40,10 +27,4 @@ protected AggregatorFunctionSupplier aggregatorFunction(List inputChann protected String expectedDescriptionOfAggregator() { return "top of ips"; } - - @Override - public void assertSimpleOutput(List input, Block result) { - Object[] values = input.stream().flatMap(b -> allBytesRefs(b)).sorted().limit(LIMIT).toArray(Object[]::new); - assertThat((List) BlockUtils.toJavaObject(result, 0), contains(values)); - } } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/TopIpGroupingAggregatorFunctionTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/TopIpGroupingAggregatorFunctionTests.java index da55ff2d7aab3..02bf6b667192b 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/TopIpGroupingAggregatorFunctionTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/TopIpGroupingAggregatorFunctionTests.java @@ -9,36 +9,14 @@ import org.apache.lucene.document.InetAddressPoint; import org.apache.lucene.util.BytesRef; -import org.elasticsearch.compute.data.Block; -import org.elasticsearch.compute.data.BlockFactory; -import org.elasticsearch.compute.data.BlockUtils; -import org.elasticsearch.compute.data.Page; -import org.elasticsearch.compute.operator.LongBytesRefTupleBlockSourceOperator; -import org.elasticsearch.compute.operator.SourceOperator; -import org.elasticsearch.core.Tuple; import org.elasticsearch.xpack.esql.core.type.DataType; import java.util.List; -import java.util.stream.IntStream; - -import static org.hamcrest.Matchers.contains; -import static org.hamcrest.Matchers.equalTo; - -public class TopIpGroupingAggregatorFunctionTests extends GroupingAggregatorFunctionTestCase { - private static final int LIMIT = 100; +public class TopIpGroupingAggregatorFunctionTests extends AbstractTopBytesRefGroupingAggregatorFunctionTests { @Override - protected SourceOperator simpleInput(BlockFactory blockFactory, int size) { - return new LongBytesRefTupleBlockSourceOperator( - blockFactory, - IntStream.range(0, size) - .mapToObj(l -> Tuple.tuple(randomLongBetween(0, 4), new BytesRef(InetAddressPoint.encode(randomIp(randomBoolean()))))) - ); - } - - @Override - protected DataType acceptedDataType() { - return DataType.IP; + protected BytesRef randomValue() { + return new BytesRef(InetAddressPoint.encode(randomIp(randomBoolean()))); } @Override @@ -47,19 +25,12 @@ protected AggregatorFunctionSupplier aggregatorFunction(List inputChann } @Override - protected String expectedDescriptionOfAggregator() { - return "top of ips"; + protected DataType acceptedDataType() { + return DataType.IP; } @Override - protected void assertSimpleGroup(List input, Block result, int position, Long group) { - Object[] values = input.stream().flatMap(b -> allBytesRefs(b, group)).sorted().limit(LIMIT).toArray(Object[]::new); - if (values.length == 0) { - assertThat(result.isNull(position), equalTo(true)); - } else if (values.length == 1) { - assertThat(BlockUtils.toJavaObject(result, position), equalTo(values[0])); - } else { - assertThat((List) BlockUtils.toJavaObject(result, position), contains(values)); - } + protected String expectedDescriptionOfAggregator() { + return "top of ips"; } } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/sort/BytesRefBucketedSortTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/sort/BytesRefBucketedSortTests.java new file mode 100644 index 0000000000000..7a4e6658cd646 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/sort/BytesRefBucketedSortTests.java @@ -0,0 +1,79 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.data.sort; + +import org.apache.lucene.document.InetAddressPoint; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.breaker.CircuitBreaker; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.BytesRefBlock; +import org.elasticsearch.compute.data.ElementType; +import org.elasticsearch.compute.data.IntVector; +import org.elasticsearch.search.sort.SortOrder; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import static org.hamcrest.Matchers.equalTo; + +public class BytesRefBucketedSortTests extends BucketedSortTestCase { + @Override + protected BytesRefBucketedSort build(SortOrder sortOrder, int bucketSize) { + BigArrays bigArrays = bigArrays(); + return new BytesRefBucketedSort( + bigArrays.breakerService().getBreaker(CircuitBreaker.REQUEST), + "test", + bigArrays, + sortOrder, + bucketSize + ); + } + + @Override + protected BytesRef randomValue() { + return new BytesRef(InetAddressPoint.encode(randomIp(randomBoolean()))); + } + + @Override + protected List threeSortedValues() { + List values = new ArrayList<>(); + values.add(new BytesRef(randomAlphaOfLength(10))); + values.add(new BytesRef(randomAlphaOfLength(11))); + values.add(new BytesRef(randomAlphaOfLength(1))); + Collections.sort(values); + return values; + } + + @Override + protected void collect(BytesRefBucketedSort sort, BytesRef value, int bucket) { + sort.collect(value, bucket); + } + + @Override + protected void merge(BytesRefBucketedSort sort, int groupId, BytesRefBucketedSort other, int otherGroupId) { + sort.merge(groupId, other, otherGroupId); + } + + @Override + protected Block toBlock(BytesRefBucketedSort sort, BlockFactory blockFactory, IntVector selected) { + return sort.toBlock(blockFactory, selected); + } + + @Override + protected void assertBlockTypeAndValues(Block block, List values) { + assertThat(block.elementType(), equalTo(ElementType.BYTES_REF)); + var typedBlock = (BytesRefBlock) block; + var scratch = new BytesRef(); + for (int i = 0; i < values.size(); i++) { + assertThat("expected value on block position " + i, typedBlock.getBytesRef(i, scratch), equalTo(values.get(i))); + } + } +} diff --git a/x-pack/plugin/esql/qa/server/mixed-cluster/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/mixed/MixedClusterEsqlSpecIT.java b/x-pack/plugin/esql/qa/server/mixed-cluster/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/mixed/MixedClusterEsqlSpecIT.java index d0d6d5fa49c42..08b4794b740d6 100644 --- a/x-pack/plugin/esql/qa/server/mixed-cluster/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/mixed/MixedClusterEsqlSpecIT.java +++ b/x-pack/plugin/esql/qa/server/mixed-cluster/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/mixed/MixedClusterEsqlSpecIT.java @@ -72,6 +72,10 @@ public MixedClusterEsqlSpecIT( protected void shouldSkipTest(String testName) throws IOException { super.shouldSkipTest(testName); assumeTrue("Test " + testName + " is skipped on " + bwcVersion, isEnabled(testName, instructions, bwcVersion)); + assumeFalse( + "Skip META tests on mixed version clusters because we change it too quickly", + testCase.requiredCapabilities.contains("meta") + ); if (mode == ASYNC) { assumeTrue("Async is not supported on " + bwcVersion, supportsAsync()); } diff --git a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java index 3e799730f7269..8d54dc63598f0 100644 --- a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java +++ b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java @@ -112,6 +112,10 @@ protected void shouldSkipTest(String testName) throws IOException { ); assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains("inlinestats")); assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains("inlinestats_v2")); + assumeFalse( + "Skip META tests on mixed version clusters because we change it too quickly", + testCase.requiredCapabilities.contains("meta") + ); } private TestFeatureService remoteFeaturesService() throws IOException { diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec index 6909f0aeb42f5..2b3fa9dec797d 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec @@ -1,5 +1,7 @@ -metaFunctionsSynopsis#[skip:-8.15.99] +metaFunctionsSynopsis required_capability: date_nanos_type +required_capability: meta + meta functions | keep synopsis; synopsis:keyword @@ -118,14 +120,16 @@ double tau() "keyword|text to_upper(str:keyword|text)" "version to_ver(field:keyword|text|version)" "version to_version(field:keyword|text|version)" -"boolean|double|integer|long|date|ip top(field:boolean|double|integer|long|date|ip, limit:integer, order:keyword)" +"boolean|double|integer|long|date|ip|keyword|text top(field:boolean|double|integer|long|date|ip|keyword|text, limit:integer, order:keyword)" "keyword|text trim(string:keyword|text)" "boolean|date|double|integer|ip|keyword|long|text|version values(field:boolean|date|double|integer|ip|keyword|long|text|version)" "double weighted_avg(number:double|integer|long, weight:double|integer|long)" ; -metaFunctionsArgs#[skip:-8.15.99] +metaFunctionsArgs +required_capability: meta required_capability: date_nanos_type + META functions | EVAL name = SUBSTRING(name, 0, 14) | KEEP name, argNames, argTypes, argDescriptions; @@ -246,13 +250,15 @@ to_unsigned_lo|field |"boolean|date|keyword|text|d to_upper |str |"keyword|text" |String expression. If `null`, the function returns `null`. to_ver |field |"keyword|text|version" |Input value. The input can be a single- or multi-valued column or an expression. to_version |field |"keyword|text|version" |Input value. The input can be a single- or multi-valued column or an expression. -top |[field, limit, order] |["boolean|double|integer|long|date|ip", integer, keyword] |[The field to collect the top values for.,The maximum number of values to collect.,The order to calculate the top values. Either `asc` or `desc`.] +top |[field, limit, order] |["boolean|double|integer|long|date|ip|keyword|text", integer, keyword] |[The field to collect the top values for.,The maximum number of values to collect.,The order to calculate the top values. Either `asc` or `desc`.] trim |string |"keyword|text" |String expression. If `null`, the function returns `null`. values |field |"boolean|date|double|integer|ip|keyword|long|text|version" |[""] weighted_avg |[number, weight] |["double|integer|long", "double|integer|long"] |[A numeric value., A numeric weight.] ; -metaFunctionsDescription#[skip:-8.15.99] +metaFunctionsDescription +required_capability: meta + META functions | EVAL name = SUBSTRING(name, 0, 14) | KEEP name, description @@ -380,8 +386,10 @@ values |Returns all values in a group as a multivalued field. The order o weighted_avg |The weighted average of a numeric expression. ; -metaFunctionsRemaining#[skip:-8.15.99] +metaFunctionsRemaining +required_capability: meta required_capability: date_nanos_type + META functions | EVAL name = SUBSTRING(name, 0, 14) | KEEP name, * @@ -504,13 +512,15 @@ to_unsigned_lo|unsigned_long to_upper |"keyword|text" |false |false |false to_ver |version |false |false |false to_version |version |false |false |false -top |"boolean|double|integer|long|date|ip" |[false, false, false] |false |true +top |"boolean|double|integer|long|date|ip|keyword|text" |[false, false, false] |false |true trim |"keyword|text" |false |false |false values |"boolean|date|double|integer|ip|keyword|long|text|version" |false |false |true weighted_avg |"double" |[false, false] |false |true ; -metaFunctionsFiltered#[skip:-8.15.99] +metaFunctionsFiltered +required_capability: meta + META FUNCTIONS | WHERE STARTS_WITH(name, "sin") ; @@ -520,7 +530,9 @@ sin |"double sin(angle:double|integer|long|unsigned_long)" |angle sinh |"double sinh(number:double|integer|long|unsigned_long)" |number |"double|integer|long|unsigned_long" | "Numeric expression. If `null`, the function returns `null`." | double | "Returns the {wikipedia}/Hyperbolic_functions[hyperbolic sine] of a number." | false | false | false ; -countFunctions#[skip:-8.15.99] +countFunctions +required_capability: meta + meta functions | stats a = count(*), b = count(*), c = count(*) | mv_expand c; a:long | b:long | c:long diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_top.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_top.csv-spec index 86f91adf506d1..80d11425c5bb6 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_top.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_top.csv-spec @@ -224,3 +224,77 @@ a:ip | b:ip | c:ip | host:keyword [fe82::cae2:65ff:fece:fec0, fe81::cae2:65ff:fece:feb9] | [fe82::cae2:65ff:fece:fec0, fe81::cae2:65ff:fece:feb9] | [fe82::cae2:65ff:fece:fec0, fe81::cae2:65ff:fece:feb9] | epsilon [fe80::cae2:65ff:fece:feb9, fe80::cae2:65ff:fece:feb9] | [fe80::cae2:65ff:fece:feb9, fe80::cae2:65ff:fece:feb9] | [fe81::cae2:65ff:fece:feb9, 127.0.0.3] | gamma ; + +topKeywords +required_capability: agg_top +required_capability: agg_top_string_support + +FROM employees +| EVAL calc = SUBSTRING(last_name, 2) +| STATS + first_name = TOP(first_name, 3, "asc"), + last_name = TOP(calc, 3, "asc"), + evil = TOP(CASE(languages <= 2, first_name, last_name), 3, "desc"); + + first_name:keyword | last_name:keyword | evil:keyword +[Alejandro, Amabile, Anneke] | [acello, addadi, aek] | [Zschoche, Zielinski, Zhongwei] +; + +topKeywordsGrouping +required_capability: agg_top +required_capability: agg_top_string_support + +FROM employees +| EVAL calc = SUBSTRING(last_name, 2) +| STATS + first_name = TOP(first_name, 3, "asc"), + last_name = TOP(calc, 3, "asc"), + evil = TOP(CASE(languages <= 2, first_name, last_name), 3, "desc") + BY job_positions +| SORT job_positions +| LIMIT 3; + + first_name:keyword | last_name:keyword | evil:keyword | job_positions:keyword + [Arumugam, Bojan, Domenick] | [acello, aine, akrucki] | [Zhongwei, Yinghua, Valdiodio] | Accountant +[Alejandro, Charlene, Danel] | [andell, cAlpine, eistad] | [Stamatiou, Sluis, Sidou] | Architect + [Basil, Breannda, Hidefumi] | [aine, alabarba, ierman] | [Tramer, Syrzycki, Stamatiou] | Business Analyst +; + +topText +required_capability: agg_top +required_capability: agg_top_string_support +# we don't need MATCH, but the loader for books.csv is busted in CsvTests +required_capability: match_operator + +FROM books +| EVAL calc = TRIM(SUBSTRING(title, 2, 5)) +| STATS + title = TOP(title, 3, "desc"), + calc = TOP(calc, 3, "asc"), + evil = TOP(CASE(year < 1980, title, author), 3, "desc"); + +title:text | calc:keyword | evil:text +[Worlds of Exile and Illusion: Three Complete Novels of the Hainish Series in One Volume--Rocannon's World, Planet of Exile, City of Illusions, Woman-The Full Story: A Dynamic Celebration of Freedoms, Winter notes on summer impressions] | ["'Bria", "Gent", "HE UN"] | [William Faulkner, William Faulkner, William Faulkner] +; + +topTextGrouping +required_capability: agg_top +required_capability: agg_top_string_support +# we don't need MATCH, but the loader for books.csv is busted in CsvTests +required_capability: match_operator + +FROM books +| EVAL calc = TRIM(SUBSTRING(title, 2, 5)) +| STATS + title = TOP(title, 3, "desc"), + calc = TOP(calc, 3, "asc"), + evil = TOP(CASE(year < 1980, title, author), 3, "desc") + BY author +| SORT author +| LIMIT 3; + + title:text | calc:keyword | evil:text | author:text + A Tolkien Compass: Including J. R. R. Tolkien's Guide to the Names in The Lord of the Rings | Tolk | A Tolkien Compass: Including J. R. R. Tolkien's Guide to the Names in The Lord of the Rings | Agnes Perkins + The Lord of the Rings Poster Collection: Six Paintings by Alan Lee (No. 1) | he Lo | [J. R. R. Tolkien, Alan Lee] | Alan Lee +A Gentle Creature and Other Stories: White Nights, A Gentle Creature, and The Dream of a Ridiculous Man (The World's Classics) | Gent | [W. J. Leatherbarrow, Fyodor Dostoevsky, Alan Myers] | Alan Myers +; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 597c349273eb2..31a3096c13cd2 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -97,6 +97,11 @@ public enum Cap { */ AGG_TOP_IP_SUPPORT, + /** + * Support for {@code keyword} and {@code text} fields in {@code TOP} aggregation. + */ + AGG_TOP_STRING_SUPPORT, + /** * {@code CASE} properly handling multivalue conditions. */ @@ -251,6 +256,13 @@ public enum Cap { */ MATCH_OPERATOR(true), + /** + * Support for the {@code META} keyword. Tests with this tag are + * intentionally excluded from mixed version clusters because we + * continually add functions, so they constantly fail if we don't. + */ + META, + /** * Add CombineBinaryComparisons rule. */ diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Top.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Top.java index 4927acc3e1cd9..cb1b0f0cad895 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Top.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Top.java @@ -13,6 +13,7 @@ import org.elasticsearch.common.lucene.BytesRefs; import org.elasticsearch.compute.aggregation.AggregatorFunctionSupplier; import org.elasticsearch.compute.aggregation.TopBooleanAggregatorFunctionSupplier; +import org.elasticsearch.compute.aggregation.TopBytesRefAggregatorFunctionSupplier; import org.elasticsearch.compute.aggregation.TopDoubleAggregatorFunctionSupplier; import org.elasticsearch.compute.aggregation.TopIntAggregatorFunctionSupplier; import org.elasticsearch.compute.aggregation.TopIpAggregatorFunctionSupplier; @@ -48,7 +49,7 @@ public class Top extends AggregateFunction implements ToAggregator, SurrogateExp private static final String ORDER_DESC = "DESC"; @FunctionInfo( - returnType = { "boolean", "double", "integer", "long", "date", "ip" }, + returnType = { "boolean", "double", "integer", "long", "date", "ip", "keyword", "text" }, description = "Collects the top values for a field. Includes repeated values.", isAggregation = true, examples = @Example(file = "stats_top", tag = "top") @@ -57,7 +58,7 @@ public Top( Source source, @Param( name = "field", - type = { "boolean", "double", "integer", "long", "date", "ip" }, + type = { "boolean", "double", "integer", "long", "date", "ip", "keyword", "text" }, description = "The field to collect the top values for." ) Expression field, @Param(name = "limit", type = { "integer" }, description = "The maximum number of values to collect.") Expression limit, @@ -125,12 +126,14 @@ protected TypeResolution resolveType() { dt -> dt == DataType.BOOLEAN || dt == DataType.DATETIME || dt == DataType.IP + || DataType.isString(dt) || (dt.isNumeric() && dt != DataType.UNSIGNED_LONG), sourceText(), FIRST, "boolean", "date", "ip", + "string", "numeric except unsigned_long or counter types" ).and(isNotNullAndFoldable(limitField(), sourceText(), SECOND)) .and(isType(limitField(), dt -> dt == DataType.INTEGER, sourceText(), SECOND, "integer")) @@ -190,6 +193,9 @@ public AggregatorFunctionSupplier supplier(List inputChannels) { if (type == DataType.IP) { return new TopIpAggregatorFunctionSupplier(inputChannels, limitValue(), orderValue()); } + if (DataType.isString(type)) { + return new TopBytesRefAggregatorFunctionSupplier(inputChannels, limitValue(), orderValue()); + } throw EsqlIllegalArgumentException.illegalDataType(type); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AggregateMapper.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AggregateMapper.java index 60bf4be1d2b03..13ce9ba77cc71 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AggregateMapper.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AggregateMapper.java @@ -170,7 +170,7 @@ private static Stream, Tuple>> typeAndNames(Class // TODO can't we figure this out from the function itself? types = List.of("Int", "Long", "Double", "Boolean", "BytesRef"); } else if (Top.class.isAssignableFrom(clazz)) { - types = List.of("Boolean", "Int", "Long", "Double", "Ip"); + types = List.of("Boolean", "Int", "Long", "Double", "Ip", "BytesRef"); } else if (Rate.class.isAssignableFrom(clazz)) { types = List.of("Int", "Long", "Double"); } else if (FromPartial.class.isAssignableFrom(clazz) || ToPartial.class.isAssignableFrom(clazz)) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/TopTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/TopTests.java index f64d6a200a031..f7bf338caa099 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/TopTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/TopTests.java @@ -46,7 +46,9 @@ public static Iterable parameters() { MultiRowTestCaseSupplier.doubleCases(1, 1000, -Double.MAX_VALUE, Double.MAX_VALUE, true), MultiRowTestCaseSupplier.dateCases(1, 1000), MultiRowTestCaseSupplier.booleanCases(1, 1000), - MultiRowTestCaseSupplier.ipCases(1, 1000) + MultiRowTestCaseSupplier.ipCases(1, 1000), + MultiRowTestCaseSupplier.stringCases(1, 1000, DataType.KEYWORD), + MultiRowTestCaseSupplier.stringCases(1, 1000, DataType.TEXT) ) .flatMap(List::stream) .map(fieldCaseSupplier -> TopTests.makeSupplier(fieldCaseSupplier, limitCaseSupplier, order)) diff --git a/x-pack/plugin/frozen-indices/src/main/java/org/elasticsearch/xpack/frozen/action/TransportFreezeIndexAction.java b/x-pack/plugin/frozen-indices/src/main/java/org/elasticsearch/xpack/frozen/action/TransportFreezeIndexAction.java index 39e992b0d103c..83f1677229972 100644 --- a/x-pack/plugin/frozen-indices/src/main/java/org/elasticsearch/xpack/frozen/action/TransportFreezeIndexAction.java +++ b/x-pack/plugin/frozen-indices/src/main/java/org/elasticsearch/xpack/frozen/action/TransportFreezeIndexAction.java @@ -14,6 +14,7 @@ import org.elasticsearch.action.admin.indices.close.CloseIndexResponse; import org.elasticsearch.action.admin.indices.open.OpenIndexClusterStateUpdateRequest; import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.ActiveShardCount; import org.elasticsearch.action.support.DestructiveOperations; import org.elasticsearch.action.support.master.TransportMasterNodeAction; import org.elasticsearch.cluster.AckedClusterStateUpdateTask; @@ -114,9 +115,13 @@ protected void masterOperation(Task task, FreezeRequest request, ClusterState st return; } - final CloseIndexClusterStateUpdateRequest closeRequest = new CloseIndexClusterStateUpdateRequest(task.getId()).ackTimeout( - request.ackTimeout() - ).masterNodeTimeout(request.masterNodeTimeout()).indices(concreteIndices); + final CloseIndexClusterStateUpdateRequest closeRequest = new CloseIndexClusterStateUpdateRequest( + request.masterNodeTimeout(), + request.ackTimeout(), + task.getId(), + ActiveShardCount.DEFAULT, + concreteIndices + ); indexStateService.closeIndices(closeRequest, new ActionListener<>() { @Override @@ -145,10 +150,12 @@ private void toggleFrozenSettings( submitUnbatchedTask( "toggle-frozen-settings", new AckedClusterStateUpdateTask(Priority.URGENT, request, listener.delegateFailure((delegate, acknowledgedResponse) -> { - OpenIndexClusterStateUpdateRequest updateRequest = new OpenIndexClusterStateUpdateRequest().ackTimeout(request.ackTimeout()) - .masterNodeTimeout(request.masterNodeTimeout()) - .indices(concreteIndices) - .waitForActiveShards(request.waitForActiveShards()); + OpenIndexClusterStateUpdateRequest updateRequest = new OpenIndexClusterStateUpdateRequest( + request.masterNodeTimeout(), + request.ackTimeout(), + request.waitForActiveShards(), + concreteIndices + ); indexStateService.openIndices( updateRequest, delegate.safeMap( diff --git a/x-pack/plugin/ilm/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/ilm/actions/SearchableSnapshotActionIT.java b/x-pack/plugin/ilm/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/ilm/actions/SearchableSnapshotActionIT.java index 0e3d0f1b2ec40..fefeaa95319ed 100644 --- a/x-pack/plugin/ilm/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/ilm/actions/SearchableSnapshotActionIT.java +++ b/x-pack/plugin/ilm/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/ilm/actions/SearchableSnapshotActionIT.java @@ -48,6 +48,7 @@ import java.util.concurrent.TimeUnit; import static java.util.Collections.singletonMap; +import static org.elasticsearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider.INDEX_TOTAL_SHARDS_PER_NODE_SETTING; import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder; import static org.elasticsearch.xpack.TimeSeriesRestDriver.createComposableTemplate; import static org.elasticsearch.xpack.TimeSeriesRestDriver.createNewSingletonPolicy; @@ -921,6 +922,61 @@ public void testSearchableSnapshotInvokesAsyncActionOnNewIndex() throws Exceptio }, 30, TimeUnit.SECONDS); } + public void testSearchableSnapshotTotalShardsPerNode() throws Exception { + String index = "myindex-" + randomAlphaOfLength(4).toLowerCase(Locale.ROOT); + Integer totalShardsPerNode = 2; + createSnapshotRepo(client(), snapshotRepo, randomBoolean()); + createPolicy( + client(), + policy, + null, + null, + new Phase( + "cold", + TimeValue.ZERO, + singletonMap(SearchableSnapshotAction.NAME, new SearchableSnapshotAction(snapshotRepo, randomBoolean())) + ), + new Phase( + "frozen", + TimeValue.ZERO, + singletonMap(SearchableSnapshotAction.NAME, new SearchableSnapshotAction(snapshotRepo, randomBoolean(), totalShardsPerNode)) + ), + null + ); + + createIndex(index, Settings.EMPTY); + ensureGreen(index); + indexDocument(client(), index, true); + + // enable ILM after we indexed a document as otherwise ILM might sometimes run so fast the indexDocument call will fail with + // `index_not_found_exception` + updateIndexSettings(index, Settings.builder().put(LifecycleSettings.LIFECYCLE_NAME, policy)); + + // wait for snapshot successfully mounted and ILM execution completed + final String searchableSnapMountedIndexName = SearchableSnapshotAction.PARTIAL_RESTORED_INDEX_PREFIX + + SearchableSnapshotAction.FULL_RESTORED_INDEX_PREFIX + index; + assertBusy(() -> { + logger.info("--> waiting for [{}] to exist...", searchableSnapMountedIndexName); + assertTrue(indexExists(searchableSnapMountedIndexName)); + }, 30, TimeUnit.SECONDS); + assertBusy(() -> { + triggerStateChange(); + Step.StepKey stepKeyForIndex = getStepKeyForIndex(client(), searchableSnapMountedIndexName); + assertThat(stepKeyForIndex.phase(), is("frozen")); + assertThat(stepKeyForIndex.name(), is(PhaseCompleteStep.NAME)); + }, 30, TimeUnit.SECONDS); + + // validate total_shards_per_node setting + Map indexSettings = getIndexSettingsAsMap(searchableSnapMountedIndexName); + assertNotNull("expected total_shards_per_node to exist", indexSettings.get(INDEX_TOTAL_SHARDS_PER_NODE_SETTING.getKey())); + Integer snapshotTotalShardsPerNode = Integer.valueOf((String) indexSettings.get(INDEX_TOTAL_SHARDS_PER_NODE_SETTING.getKey())); + assertEquals( + "expected total_shards_per_node to be " + totalShardsPerNode + ", but got: " + snapshotTotalShardsPerNode, + snapshotTotalShardsPerNode, + totalShardsPerNode + ); + } + /** * Cause a bit of cluster activity using an empty reroute call in case the `wait-for-index-colour` ILM step missed the * notification that partial-index is now GREEN. diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/cluster/metadata/SemanticTextClusterMetadataTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/cluster/metadata/SemanticTextClusterMetadataTests.java index 1c4a2f561ad4a..bfec2d5ac3484 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/cluster/metadata/SemanticTextClusterMetadataTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/cluster/metadata/SemanticTextClusterMetadataTests.java @@ -11,7 +11,6 @@ import org.elasticsearch.action.admin.indices.mapping.put.PutMappingClusterStateUpdateRequest; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.cluster.service.ClusterStateTaskExecutorUtils; -import org.elasticsearch.index.Index; import org.elasticsearch.index.IndexService; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.test.ESSingleNodeTestCase; @@ -46,9 +45,14 @@ public void testSingleSourceSemanticTextField() throws Exception { final MetadataMappingService.PutMappingExecutor putMappingExecutor = mappingService.new PutMappingExecutor(); final ClusterService clusterService = getInstanceFromNode(ClusterService.class); - final PutMappingClusterStateUpdateRequest request = new PutMappingClusterStateUpdateRequest(""" - { "properties": { "field": { "type": "semantic_text", "inference_id": "test_model" }}}"""); - request.indices(new Index[] { indexService.index() }); + final PutMappingClusterStateUpdateRequest request = new PutMappingClusterStateUpdateRequest( + TEST_REQUEST_TIMEOUT, + TEST_REQUEST_TIMEOUT, + """ + { "properties": { "field": { "type": "semantic_text", "inference_id": "test_model" }}}""", + false, + indexService.index() + ); final var resultingState = ClusterStateTaskExecutorUtils.executeAndAssertSuccessful( clusterService.state(), putMappingExecutor, @@ -63,25 +67,30 @@ public void testCopyToSemanticTextField() throws Exception { final MetadataMappingService.PutMappingExecutor putMappingExecutor = mappingService.new PutMappingExecutor(); final ClusterService clusterService = getInstanceFromNode(ClusterService.class); - final PutMappingClusterStateUpdateRequest request = new PutMappingClusterStateUpdateRequest(""" - { - "properties": { - "semantic": { - "type": "semantic_text", - "inference_id": "test_model" - }, - "copy_origin_1": { - "type": "text", - "copy_to": "semantic" - }, - "copy_origin_2": { - "type": "text", - "copy_to": "semantic" + final PutMappingClusterStateUpdateRequest request = new PutMappingClusterStateUpdateRequest( + TEST_REQUEST_TIMEOUT, + TEST_REQUEST_TIMEOUT, + """ + { + "properties": { + "semantic": { + "type": "semantic_text", + "inference_id": "test_model" + }, + "copy_origin_1": { + "type": "text", + "copy_to": "semantic" + }, + "copy_origin_2": { + "type": "text", + "copy_to": "semantic" + } + } } - } - } - """); - request.indices(new Index[] { indexService.index() }); + """, + false, + indexService.index() + ); final var resultingState = ClusterStateTaskExecutorUtils.executeAndAssertSuccessful( clusterService.state(), putMappingExecutor, diff --git a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/PyTorchModelIT.java b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/PyTorchModelIT.java index 34ef0baecccc5..4e92cad1026a3 100644 --- a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/PyTorchModelIT.java +++ b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/PyTorchModelIT.java @@ -1120,6 +1120,28 @@ public void testStartMultipleLowPriorityDeployments() throws Exception { } } + @SuppressWarnings("unchecked") + public void testDeploymentThreadsIncludedInUsage() throws IOException { + String modelId = "deployment_threads_in_usage"; + createPassThroughModel(modelId); + putModelDefinition(modelId); + putVocabulary(List.of("these", "are", "my", "words"), modelId); + startDeployment(modelId); + + Request request = new Request("GET", "/_xpack/usage"); + var usage = entityAsMap(client().performRequest(request).getEntity()); + + var ml = (Map) usage.get("ml"); + assertNotNull(usage.toString(), ml); + var inference = (Map) ml.get("inference"); + var deployments = (Map) inference.get("deployments"); + var deploymentStats = (List>) deployments.get("stats_by_model"); + for (var stat : deploymentStats) { + assertThat(stat.toString(), (Integer) stat.get("num_threads"), greaterThanOrEqualTo(1)); + assertThat(stat.toString(), (Integer) stat.get("num_allocations"), greaterThanOrEqualTo(1)); + } + } + private void putModelDefinition(String modelId) throws IOException { putModelDefinition(modelId, BASE_64_ENCODED_MODEL, RAW_MODEL_SIZE); } diff --git a/x-pack/plugin/ml/qa/single-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/MlUsageIT.java b/x-pack/plugin/ml/qa/single-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/MlUsageIT.java new file mode 100644 index 0000000000000..05a307c2dfad3 --- /dev/null +++ b/x-pack/plugin/ml/qa/single-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/MlUsageIT.java @@ -0,0 +1,35 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.ml.integration; + +import org.elasticsearch.client.Request; +import org.elasticsearch.test.rest.ESRestTestCase; + +import java.io.IOException; +import java.util.Map; + +import static org.hamcrest.Matchers.greaterThanOrEqualTo; + +// Test the phone home/telemetry data +public class MlUsageIT extends ESRestTestCase { + + @SuppressWarnings("unchecked") + public void testMLUsage() throws IOException { + Request request = new Request("GET", "/_xpack/usage"); + var usage = entityAsMap(client().performRequest(request).getEntity()); + + var ml = (Map) usage.get("ml"); + assertNotNull(usage.toString(), ml); + var memoryUsage = (Map) ml.get("memory"); + assertNotNull(ml.toString(), memoryUsage); + assertThat(memoryUsage.toString(), (Integer) memoryUsage.get("anomaly_detectors_memory_bytes"), greaterThanOrEqualTo(0)); + assertThat(memoryUsage.toString(), (Integer) memoryUsage.get("data_frame_analytics_memory_bytes"), greaterThanOrEqualTo(0)); + assertThat(memoryUsage.toString(), (Integer) memoryUsage.get("pytorch_inference_memory_bytes"), greaterThanOrEqualTo(0)); + assertThat(memoryUsage.toString(), (Integer) memoryUsage.get("total_used_memory_bytes"), greaterThanOrEqualTo(0)); + } +} diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearningUsageTransportAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearningUsageTransportAction.java index 583965e76e542..40e3fbb661db1 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearningUsageTransportAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearningUsageTransportAction.java @@ -39,6 +39,7 @@ import org.elasticsearch.xpack.core.ml.action.GetJobsStatsAction; import org.elasticsearch.xpack.core.ml.action.GetTrainedModelsAction; import org.elasticsearch.xpack.core.ml.action.GetTrainedModelsStatsAction; +import org.elasticsearch.xpack.core.ml.action.MlMemoryAction; import org.elasticsearch.xpack.core.ml.datafeed.DatafeedState; import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig; import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsState; @@ -65,6 +66,7 @@ import java.util.Map; import java.util.Objects; import java.util.TreeMap; +import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; import java.util.stream.Collectors; @@ -72,16 +74,20 @@ public class MachineLearningUsageTransportAction extends XPackUsageFeatureTransportAction { - private static class ModelStats { + private static class DeploymentStats { private final String modelId; private final String taskType; private final StatsAccumulator inferenceCounts = new StatsAccumulator(); private Instant lastAccess; + private final int numThreads; + private final int numAllocations; - ModelStats(String modelId, String taskType) { + DeploymentStats(String modelId, String taskType, int numThreads, int numAllocations) { this.modelId = modelId; this.taskType = taskType; + this.numThreads = numThreads; + this.numAllocations = numAllocations; } void update(AssignmentStats.NodeStats stats) { @@ -95,6 +101,8 @@ Map asMap() { Map result = new HashMap<>(); result.put("model_id", modelId); result.put("task_type", taskType); + result.put("num_allocations", numAllocations); + result.put("num_threads", numThreads); result.put("inference_counts", inferenceCounts.asMap()); if (lastAccess != null) { result.put("last_access", lastAccess.toString()); @@ -158,6 +166,7 @@ protected void masterOperation( Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), + Collections.emptyMap(), 0 ); listener.onResponse(new XPackUsageFeatureResponse(usage)); @@ -167,11 +176,14 @@ protected void masterOperation( Map jobsUsage = new LinkedHashMap<>(); Map datafeedsUsage = new LinkedHashMap<>(); Map analyticsUsage = new LinkedHashMap<>(); + AtomicReference> inferenceUsage = new AtomicReference<>(Map.of()); + int nodeCount = mlNodeCount(state); - // Step 5. return final ML usage - ActionListener> inferenceUsageListener = ActionListener.wrap( - inferenceUsage -> listener.onResponse( + // Step 6. return final ML usage + ActionListener memoryUsageListener = ActionListener.wrap(memoryResponse -> { + var memoryUsage = extractMemoryUsage(memoryResponse); + listener.onResponse( new XPackUsageFeatureResponse( new MachineLearningFeatureSetUsage( MachineLearningField.ML_API_FEATURE.checkWithoutTracking(licenseState), @@ -179,28 +191,38 @@ protected void masterOperation( jobsUsage, datafeedsUsage, analyticsUsage, - inferenceUsage, + inferenceUsage.get(), + memoryUsage, nodeCount ) ) - ), - e -> { - logger.warn("Failed to get inference usage to include in ML usage", e); - listener.onResponse( - new XPackUsageFeatureResponse( - new MachineLearningFeatureSetUsage( - MachineLearningField.ML_API_FEATURE.checkWithoutTracking(licenseState), - enabled, - jobsUsage, - datafeedsUsage, - analyticsUsage, - Collections.emptyMap(), - nodeCount - ) + ); + }, e -> { + logger.warn("Failed to get memory usage to include in ML usage", e); + listener.onResponse( + new XPackUsageFeatureResponse( + new MachineLearningFeatureSetUsage( + MachineLearningField.ML_API_FEATURE.checkWithoutTracking(licenseState), + enabled, + jobsUsage, + datafeedsUsage, + analyticsUsage, + inferenceUsage.get(), + Collections.emptyMap(), + nodeCount ) - ); - } - ); + ) + ); + }); + + // Step 5. Get + ActionListener> inferenceUsageListener = ActionListener.wrap(inference -> { + inferenceUsage.set(inference); + client.execute(MlMemoryAction.INSTANCE, new MlMemoryAction.Request("_all"), memoryUsageListener); + }, e -> { + logger.warn("Failed to get inference usage to include in ML usage", e); + client.execute(MlMemoryAction.INSTANCE, new MlMemoryAction.Request("_all"), memoryUsageListener); + }); // Step 4. Extract usage from data frame analytics configs and then get inference usage ActionListener dataframeAnalyticsListener = ActionListener.wrap(response -> { @@ -464,7 +486,7 @@ private static void addDeploymentStats( int deploymentsCount = 0; double avgTimeSum = 0.0; StatsAccumulator nodeDistribution = new StatsAccumulator(); - Map statsByModel = new TreeMap<>(); + Map statsByModel = new TreeMap<>(); for (var stats : statsResponse.getResources().results()) { AssignmentStats deploymentStats = stats.getDeploymentStats(); if (deploymentStats == null) { @@ -478,7 +500,15 @@ private static void addDeploymentStats( String modelId = deploymentStats.getModelId(); String taskType = taskTypes.get(deploymentStats.getModelId()); String mapKey = modelId + ":" + taskType; - ModelStats modelStats = statsByModel.computeIfAbsent(mapKey, key -> new ModelStats(modelId, taskType)); + DeploymentStats modelStats = statsByModel.computeIfAbsent( + mapKey, + key -> new DeploymentStats( + modelId, + taskType, + deploymentStats.getThreadsPerAllocation(), + deploymentStats.getNumberOfAllocations() + ) + ); for (var nodeStats : deploymentStats.getNodeStats()) { long nodeInferenceCount = nodeStats.getInferenceCount().orElse(0L); avgTimeSum += nodeStats.getAvgInferenceTime().orElse(0.0) * nodeInferenceCount; @@ -499,7 +529,7 @@ private static void addDeploymentStats( "inference_counts", nodeDistribution.asMap(), "stats_by_model", - statsByModel.values().stream().map(ModelStats::asMap).collect(Collectors.toList()) + statsByModel.values().stream().map(DeploymentStats::asMap).collect(Collectors.toList()) ) ); } @@ -590,6 +620,21 @@ private static void addInferenceIngestUsage(GetTrainedModelsStatsAction.Response inferenceUsage.put("ingest_processors", Collections.singletonMap(MachineLearningFeatureSetUsage.ALL, ingestUsage)); } + private static Map extractMemoryUsage(MlMemoryAction.Response memoryResponse) { + var adMem = memoryResponse.getNodes().stream().mapToLong(mem -> mem.getMlAnomalyDetectors().getBytes()).sum(); + var dfaMem = memoryResponse.getNodes().stream().mapToLong(mem -> mem.getMlDataFrameAnalytics().getBytes()).sum(); + var pytorchMem = memoryResponse.getNodes().stream().mapToLong(mem -> mem.getMlNativeInference().getBytes()).sum(); + var nativeOverheadMem = memoryResponse.getNodes().stream().mapToLong(mem -> mem.getMlNativeCodeOverhead().getBytes()).sum(); + long totalUsedMem = adMem + dfaMem + pytorchMem + nativeOverheadMem; + + var memoryUsage = new LinkedHashMap(); + memoryUsage.put("anomaly_detectors_memory_bytes", adMem); + memoryUsage.put("data_frame_analytics_memory_bytes", dfaMem); + memoryUsage.put("pytorch_inference_memory_bytes", pytorchMem); + memoryUsage.put("total_used_memory_bytes", totalUsedMem); + return memoryUsage; + } + private static Map getMinMaxSumAsLongsFromStats(StatsAccumulator stats) { Map asMap = Maps.newMapWithExpectedSize(3); asMap.put("sum", Double.valueOf(stats.getTotal()).longValue()); diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningInfoTransportActionTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningInfoTransportActionTests.java index e5575abfeb020..4fdb7d2e5e46c 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningInfoTransportActionTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningInfoTransportActionTests.java @@ -10,9 +10,11 @@ import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.client.internal.Client; +import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.cluster.metadata.Metadata; +import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodeRole; import org.elasticsearch.cluster.node.DiscoveryNodeUtils; import org.elasticsearch.cluster.node.DiscoveryNodes; @@ -46,6 +48,7 @@ import org.elasticsearch.xpack.core.ml.action.GetJobsStatsAction; import org.elasticsearch.xpack.core.ml.action.GetTrainedModelsAction; import org.elasticsearch.xpack.core.ml.action.GetTrainedModelsStatsAction; +import org.elasticsearch.xpack.core.ml.action.MlMemoryAction; import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig; import org.elasticsearch.xpack.core.ml.datafeed.DatafeedState; import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig; @@ -134,6 +137,27 @@ public void init() { new QueryPage<>(Collections.emptyList(), 0, GetTrainedModelsStatsAction.Response.RESULTS_FIELD) ) ); + givenMlMemory( + new MlMemoryAction.Response( + new ClusterName("cluster_foo"), + List.of( + new MlMemoryAction.Response.MlMemoryStats( + mock(DiscoveryNode.class), + ByteSizeValue.ofBytes(100L), + ByteSizeValue.ofBytes(1L), + ByteSizeValue.ofBytes(1L), + ByteSizeValue.ofBytes(1L), + ByteSizeValue.ofBytes(20L), + ByteSizeValue.ofBytes(30L), + ByteSizeValue.ofBytes(40L), + ByteSizeValue.ofBytes(1L), + ByteSizeValue.ofBytes(1L), + ByteSizeValue.ofBytes(1L) + ) + ), + List.of() + ) + ); } @After @@ -343,6 +367,8 @@ public void testUsage() throws Exception { assertThat(source.getValue("inference.deployments.inference_counts.avg"), equalTo(4.0)); assertThat(source.getValue("inference.deployments.stats_by_model.0.model_id"), equalTo("model_3")); assertThat(source.getValue("inference.deployments.stats_by_model.0.task_type"), equalTo("ner")); + assertThat(source.getValue("inference.deployments.stats_by_model.0.num_allocations"), equalTo(8)); + assertThat(source.getValue("inference.deployments.stats_by_model.0.num_threads"), equalTo(1)); assertThat(source.getValue("inference.deployments.stats_by_model.0.last_access"), equalTo(lastAccess(3).toString())); assertThat(source.getValue("inference.deployments.stats_by_model.0.inference_counts.total"), equalTo(3.0)); assertThat(source.getValue("inference.deployments.stats_by_model.0.inference_counts.min"), equalTo(3.0)); @@ -350,6 +376,8 @@ public void testUsage() throws Exception { assertThat(source.getValue("inference.deployments.stats_by_model.0.inference_counts.avg"), equalTo(3.0)); assertThat(source.getValue("inference.deployments.stats_by_model.1.model_id"), equalTo("model_4")); assertThat(source.getValue("inference.deployments.stats_by_model.1.task_type"), equalTo("text_expansion")); + assertThat(source.getValue("inference.deployments.stats_by_model.1.num_allocations"), equalTo(2)); + assertThat(source.getValue("inference.deployments.stats_by_model.1.num_threads"), equalTo(2)); assertThat(source.getValue("inference.deployments.stats_by_model.1.last_access"), equalTo(lastAccess(44).toString())); assertThat(source.getValue("inference.deployments.stats_by_model.1.inference_counts.total"), equalTo(9.0)); assertThat(source.getValue("inference.deployments.stats_by_model.1.inference_counts.min"), equalTo(4.0)); @@ -360,6 +388,11 @@ public void testUsage() throws Exception { assertThat(source.getValue("inference.deployments.model_sizes_bytes.max"), equalTo(1000.0)); assertThat(source.getValue("inference.deployments.model_sizes_bytes.avg"), equalTo(650.0)); assertThat(source.getValue("inference.deployments.time_ms.avg"), closeTo(44.0, 1e-10)); + + assertThat(source.getValue("memory.anomaly_detectors_memory_bytes"), equalTo(20)); + assertThat(source.getValue("memory.data_frame_analytics_memory_bytes"), equalTo(30)); + assertThat(source.getValue("memory.pytorch_inference_memory_bytes"), equalTo(40)); + assertThat(source.getValue("memory.total_used_memory_bytes"), equalTo(91)); } } @@ -566,6 +599,8 @@ public void testUsageWithOrphanedTask() throws Exception { Job closed1 = buildJob("closed1", Arrays.asList(buildMinDetector("foo"), buildMinDetector("bar"), buildMinDetector("foobar"))); GetJobsStatsAction.Response.JobStats closed1JobStats = buildJobStats("closed1", JobState.CLOSED, 300L, 0); givenJobs(Arrays.asList(opened1, closed1), Arrays.asList(opened1JobStats, opened2JobStats, closed1JobStats)); + MlMemoryAction.Response memory = new MlMemoryAction.Response(new ClusterName("foo"), List.of(), List.of()); + givenMlMemory(memory); var usageAction = newUsageAction(settings.build(), true, true, true); PlainActionFuture future = new PlainActionFuture<>(); @@ -590,6 +625,11 @@ public void testUsageWithOrphanedTask() throws Exception { assertThat(source.getValue("jobs._all.model_size.avg"), equalTo(200.0)); assertThat(source.getValue("jobs._all.created_by.a_cool_module"), equalTo(1)); assertThat(source.getValue("jobs._all.created_by.unknown"), equalTo(1)); + + assertThat(source.getValue("memory.anomaly_detectors_memory_bytes"), equalTo(0)); + assertThat(source.getValue("memory.data_frame_analytics_memory_bytes"), equalTo(0)); + assertThat(source.getValue("memory.pytorch_inference_memory_bytes"), equalTo(0)); + assertThat(source.getValue("memory.total_used_memory_bytes"), equalTo(0)); } public void testUsageDisabledML() throws Exception { @@ -802,6 +842,15 @@ private void givenTrainedModelStats(GetTrainedModelsStatsAction.Response trained }).when(client).execute(same(GetTrainedModelsStatsAction.INSTANCE), any(), any()); } + private void givenMlMemory(MlMemoryAction.Response memoryUsage) { + doAnswer(invocationOnMock -> { + @SuppressWarnings("unchecked") + ActionListener listener = (ActionListener) invocationOnMock.getArguments()[2]; + listener.onResponse(memoryUsage); + return Void.TYPE; + }).when(client).execute(same(MlMemoryAction.INSTANCE), any(), any()); + } + private static Detector buildMinDetector(String fieldName) { Detector.Builder detectorBuilder = new Detector.Builder(); detectorBuilder.setFunction("min"); @@ -1004,8 +1053,8 @@ private Map setupComplexMocks() { new AssignmentStats( "deployment_3", "model_3", - null, - null, + 1, + 8, null, null, null, @@ -1111,6 +1160,29 @@ private Map setupComplexMocks() { ) ) ); + + givenMlMemory( + new MlMemoryAction.Response( + new ClusterName("cluster_foo"), + List.of( + new MlMemoryAction.Response.MlMemoryStats( + mock(DiscoveryNode.class), + ByteSizeValue.ofBytes(100L), + ByteSizeValue.ofBytes(1L), + ByteSizeValue.ofBytes(1L), + ByteSizeValue.ofBytes(1L), + ByteSizeValue.ofBytes(20L), + ByteSizeValue.ofBytes(30L), + ByteSizeValue.ofBytes(40L), + ByteSizeValue.ofBytes(1L), + ByteSizeValue.ofBytes(1L), + ByteSizeValue.ofBytes(1L) + ) + ), + List.of() + ) + ); + return expectedDfaCountByAnalysis; } diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/wildcard/20_ignore_above_stored_source.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/wildcard/20_ignore_above_stored_source.yml new file mode 100644 index 0000000000000..252bafbdbe15a --- /dev/null +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/wildcard/20_ignore_above_stored_source.yml @@ -0,0 +1,56 @@ +--- +wildcard field type ignore_above: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + ignore_above: 10 + mappings: + properties: + a_wildcard: + type: wildcard + b_wildcard: + type: wildcard + ignore_above: 20 + c_wildcard: + type: wildcard + d_wildcard: + type: wildcard + ignore_above: 5 + + + + - do: + index: + index: test + refresh: true + id: "1" + body: { "a_wildcard": "foo bar", "b_wildcard": "the quick brown", "c_wildcard": ["foo", "bar", "jumps over the lazy dog"], "d_wildcard": ["foo", "bar", "the quick"]} + + - do: + search: + body: + fields: + - a_wildcard + - b_wildcard + - c_wildcard + - d_wildcard + query: + match_all: {} + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.a_wildcard: "foo bar" } + - match: { hits.hits.0._source.b_wildcard: "the quick brown" } + - match: { hits.hits.0._source.c_wildcard: ["foo", "bar", "jumps over the lazy dog"] } + - match: { hits.hits.0._source.d_wildcard: ["foo", "bar", "the quick"] } + - match: { hits.hits.0.fields.a_wildcard.0: "foo bar" } + - match: { hits.hits.0.fields.b_wildcard.0: "the quick brown" } + - match: { hits.hits.0.fields.c_wildcard: ["foo", "bar"] } + - match: { hits.hits.0.fields.d_wildcard: ["foo", "bar"] } + diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/wildcard/30_ignore_above_synthetic_source.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/wildcard/30_ignore_above_synthetic_source.yml new file mode 100644 index 0000000000000..f5c9f3d92369a --- /dev/null +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/wildcard/30_ignore_above_synthetic_source.yml @@ -0,0 +1,58 @@ +--- +wildcard field type ignore_above: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + ignore_above: 10 + mappings: + _source: + mode: synthetic + properties: + a_wildcard: + type: wildcard + b_wildcard: + type: wildcard + ignore_above: 20 + c_wildcard: + type: wildcard + d_wildcard: + type: wildcard + ignore_above: 5 + + + + - do: + index: + index: test + refresh: true + id: "1" + body: { "a_wildcard": "foo bar", "b_wildcard": "the quick brown", "c_wildcard": ["foo", "bar", "jumps over the lazy dog"], "d_wildcard": ["foo", "bar", "the quick"]} + + - do: + search: + body: + fields: + - a_wildcard + - b_wildcard + - c_wildcard + - d_wildcard + query: + match_all: {} + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.a_wildcard: "foo bar" } + - match: { hits.hits.0._source.b_wildcard: "the quick brown" } + - match: { hits.hits.0._source.c_wildcard: ["bar", "foo"] } + - match: { hits.hits.0._source.d_wildcard: ["bar", "foo", "the quick"] } + - match: { hits.hits.0.fields.a_wildcard.0: "foo bar" } + - match: { hits.hits.0.fields.b_wildcard.0: "the quick brown" } + - match: { hits.hits.0.fields.c_wildcard: ["bar", "foo"] } + - match: { hits.hits.0.fields.d_wildcard: ["bar", "foo"] } + diff --git a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java index 8e4f56e299587..1e97e64371586 100644 --- a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java +++ b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java @@ -87,6 +87,8 @@ import java.util.Map; import java.util.Set; +import static org.elasticsearch.index.IndexSettings.IGNORE_ABOVE_SETTING; + /** * A {@link FieldMapper} for indexing fields with ngrams for efficient wildcard matching */ @@ -191,7 +193,6 @@ public static class Defaults { Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER ); - public static final int IGNORE_ABOVE = Integer.MAX_VALUE; } private static WildcardFieldMapper toType(FieldMapper in) { @@ -200,21 +201,28 @@ private static WildcardFieldMapper toType(FieldMapper in) { public static class Builder extends FieldMapper.Builder { - final Parameter ignoreAbove = Parameter.intParam("ignore_above", true, m -> toType(m).ignoreAbove, Defaults.IGNORE_ABOVE) - .addValidator(v -> { - if (v < 0) { - throw new IllegalArgumentException("[ignore_above] must be positive, got [" + v + "]"); - } - }); + final Parameter ignoreAbove; final Parameter nullValue = Parameter.stringParam("null_value", false, m -> toType(m).nullValue, null).acceptsNull(); final Parameter> meta = Parameter.metaParam(); final IndexVersion indexVersionCreated; - public Builder(String name, IndexVersion indexVersionCreated) { + final int ignoreAboveDefault; + + public Builder(final String name, IndexVersion indexVersionCreated) { + this(name, Integer.MAX_VALUE, indexVersionCreated); + } + + private Builder(String name, int ignoreAboveDefault, IndexVersion indexVersionCreated) { super(name); this.indexVersionCreated = indexVersionCreated; + this.ignoreAboveDefault = ignoreAboveDefault; + this.ignoreAbove = Parameter.intParam("ignore_above", true, m -> toType(m).ignoreAbove, ignoreAboveDefault).addValidator(v -> { + if (v < 0) { + throw new IllegalArgumentException("[ignore_above] must be positive, got [" + v + "]"); + } + }); } @Override @@ -236,23 +244,18 @@ Builder nullValue(String nullValue) { public WildcardFieldMapper build(MapperBuilderContext context) { return new WildcardFieldMapper( leafName(), - new WildcardFieldType( - context.buildFullName(leafName()), - nullValue.get(), - ignoreAbove.get(), - indexVersionCreated, - meta.get() - ), - ignoreAbove.get(), + new WildcardFieldType(context.buildFullName(leafName()), indexVersionCreated, meta.get(), this), context.isSourceSynthetic(), builderParams(this, context), - nullValue.get(), - indexVersionCreated + indexVersionCreated, + this ); } } - public static TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated())); + public static TypeParser PARSER = new TypeParser( + (n, c) -> new Builder(n, IGNORE_ABOVE_SETTING.get(c.getSettings()), c.indexVersionCreated()) + ); public static final char TOKEN_START_OR_END_CHAR = 0; public static final String TOKEN_START_STRING = Character.toString(TOKEN_START_OR_END_CHAR); @@ -263,18 +266,18 @@ public static final class WildcardFieldType extends MappedFieldType { static Analyzer lowercaseNormalizer = new LowercaseNormalizer(); private final String nullValue; - private final int ignoreAbove; private final NamedAnalyzer analyzer; + private final int ignoreAbove; - private WildcardFieldType(String name, String nullValue, int ignoreAbove, IndexVersion version, Map meta) { + private WildcardFieldType(String name, IndexVersion version, Map meta, Builder builder) { super(name, true, false, true, Defaults.TEXT_SEARCH_INFO, meta); if (version.onOrAfter(IndexVersions.V_7_10_0)) { this.analyzer = WILDCARD_ANALYZER_7_10; } else { this.analyzer = WILDCARD_ANALYZER_7_9; } - this.nullValue = nullValue; - this.ignoreAbove = ignoreAbove; + this.nullValue = builder.nullValue.getValue(); + this.ignoreAbove = builder.ignoreAbove.getValue(); } @Override @@ -889,26 +892,27 @@ protected String parseSourceValue(Object value) { NGRAM_FIELD_TYPE = freezeAndDeduplicateFieldType(ft); assert NGRAM_FIELD_TYPE.indexOptions() == IndexOptions.DOCS; } - - private final int ignoreAbove; private final String nullValue; private final IndexVersion indexVersionCreated; + + private final int ignoreAbove; + private final int ignoreAboveDefault; private final boolean storeIgnored; private WildcardFieldMapper( String simpleName, WildcardFieldType mappedFieldType, - int ignoreAbove, boolean storeIgnored, BuilderParams builderParams, - String nullValue, - IndexVersion indexVersionCreated + IndexVersion indexVersionCreated, + Builder builder ) { super(simpleName, mappedFieldType, builderParams); - this.nullValue = nullValue; - this.ignoreAbove = ignoreAbove; + this.nullValue = builder.nullValue.getValue(); this.storeIgnored = storeIgnored; this.indexVersionCreated = indexVersionCreated; + this.ignoreAbove = builder.ignoreAbove.getValue(); + this.ignoreAboveDefault = builder.ignoreAboveDefault; } @Override @@ -983,14 +987,14 @@ protected String contentType() { @Override public FieldMapper.Builder getMergeBuilder() { - return new Builder(leafName(), indexVersionCreated).init(this); + return new Builder(leafName(), ignoreAboveDefault, indexVersionCreated).init(this); } @Override protected SyntheticSourceSupport syntheticSourceSupport() { var layers = new ArrayList(); layers.add(new WildcardSyntheticFieldLoader()); - if (ignoreAbove != Defaults.IGNORE_ABOVE) { + if (ignoreAbove != ignoreAboveDefault) { layers.add(new CompositeSyntheticFieldLoader.StoredFieldLayer(originalName()) { @Override protected void writeValue(Object value, XContentBuilder b) throws IOException { diff --git a/x-pack/qa/runtime-fields/src/main/java/org/elasticsearch/xpack/runtimefields/test/CoreTestTranslater.java b/x-pack/qa/runtime-fields/src/main/java/org/elasticsearch/xpack/runtimefields/test/CoreTestTranslater.java index 2bea4bb247d8f..d34303ea803d6 100644 --- a/x-pack/qa/runtime-fields/src/main/java/org/elasticsearch/xpack/runtimefields/test/CoreTestTranslater.java +++ b/x-pack/qa/runtime-fields/src/main/java/org/elasticsearch/xpack/runtimefields/test/CoreTestTranslater.java @@ -222,10 +222,32 @@ public boolean modifySections(List executables) { */ protected abstract boolean modifySearch(ApiCallSection search); + private static Object getSetting(final Object map, final String... keys) { + Map current = (Map) map; + for (final String key : keys) { + if (current != null) { + current = (Map) current.get(key); + } else { + return null; + } + } + return current; + } + private boolean modifyCreateIndex(ApiCallSection createIndex) { String index = createIndex.getParams().get("index"); for (Map body : createIndex.getBodies()) { - Object settings = body.get("settings"); + final Object settings = body.get("settings"); + final Object indexMapping = getSetting(settings, "index", "mapping"); + if (indexMapping instanceof Map m) { + final Object ignoreAbove = m.get("ignore_above"); + if (ignoreAbove instanceof Integer ignoreAboveValue) { + if (ignoreAboveValue >= 0) { + // Scripts don't support ignore_above so we skip those fields + continue; + } + } + } if (settings instanceof Map && ((Map) settings).containsKey("sort.field")) { /* * You can't sort the index on a runtime field